summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt5
-rw-r--r--drivers/net/ethernet/apple/macmace.c25
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c414
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c206
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c12
-rw-r--r--drivers/net/ipvlan/ipvlan.h1
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c34
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c60
-rw-r--r--drivers/net/phy/phylink.c33
-rw-r--r--drivers/net/phy/sfp-bus.c162
-rw-r--r--drivers/net/phy/sfp.c150
-rw-r--r--drivers/net/team/team.c16
-rw-r--r--include/linux/sfp.h18
-rw-r--r--include/net/ethoc.h1
-rw-r--r--include/net/flow.h2
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/ip.h12
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ipv6.h10
-rw-r--r--include/net/pkt_cls.h8
-rw-r--r--include/net/xfrm.h14
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/README56
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh87
-rw-r--r--tools/testing/selftests/net/forwarding/config12
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample31
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh533
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh125
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh322
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh195
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh122
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh23
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh196
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh122
50 files changed, 2799 insertions, 303 deletions
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
high Tx rate. Must not be present for SFF modules
+- maximum-power-milliwatt : Maximum module power consumption
+ Specifies the maximum power consumption allowable by a module in the
+ slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W.
+
Example #1: Direct serdes to SFP connection
sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
i2c-bus = <&sfp_1g_i2c>;
los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <1000>;
pinctrl-names = "default";
pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
dev->netdev_ops = &mace_netdev_ops;
dev->watchdog_timeo = TX_TIMEOUT;
- printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
- dev->name, dev->dev_addr);
+ pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+ dev->dev_addr, mp->chipid);
err = register_netdev(dev);
if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
else if (fs & (UFLO|LCOL|RTRY)) {
++dev->stats.tx_aborted_errors;
if (mb->xmtfs & UFLO) {
- printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
dev->stats.tx_fifo_errors++;
mace_txdma_reset(dev);
}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
dev->stats.rx_errors++;
- if (frame_status & RS_OFLO) {
- printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+ if (frame_status & RS_OFLO)
dev->stats.rx_fifo_errors++;
- }
if (frame_status & RS_CLSN)
dev->stats.collisions++;
if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
},
};
-static int __init mac_mace_init_module(void)
-{
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
- platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
#include "be.h"
#include "be_cmds.h"
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
"Physical Link is functional",
"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
phy_state == BE_PHY_UNQUALIFIED || \
phy_state == BE_PHY_UNCERTIFIED)
-extern char *be_misconfig_evt_port_state[];
+extern const char * const be_misconfig_evt_port_state[];
/* async event indicating misconfigured port */
struct be_async_event_misconfig_port {
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 5a1668cdb461..9418f6eed086 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -65,6 +65,10 @@
#define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000
#define MVPP2_RXQ_DISABLE_MASK BIT(31)
+/* Top Registers */
+#define MVPP2_MH_REG(port) (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED BIT(5)
+
/* Parser Registers */
#define MVPP2_PRS_INIT_LOOKUP_REG 0x1000
#define MVPP2_PRS_PORT_LU_MAX 0xf
@@ -473,6 +477,7 @@
#define MVPP2_ETH_TYPE_LEN 2
#define MVPP2_PPPOE_HDR_SIZE 8
#define MVPP2_VLAN_TAG_LEN 4
+#define MVPP2_VLAN_TAG_EDSA_LEN 8
/* Lbtd 802.3 type */
#define MVPP2_IP_LBDT_TYPE 0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_TCAM_LU_BYTE 20
#define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2)
#define MVPP2_PRS_TCAM_INV_WORD 5
+
+#define MVPP2_PRS_VID_TCAM_BYTE 2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX 11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
/* Tcam entries ID */
#define MVPP2_PE_DROP_ALL 0
#define MVPP2_PE_FIRST_FREE_TID 1
-#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \
+ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1)
#define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
#define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
#define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
#define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
#define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
#define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
#define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
#define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
#define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
#define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+ ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
/* Sram structure
* The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
*/
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4)
#define MVPP2_PRS_SINGLE_VLAN_AI 0
#define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0)
/* DSA/EDSA type */
#define MVPP2_PRS_TAGGED true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
MVPP2_PRS_LU_MAC,
MVPP2_PRS_LU_DSA,
MVPP2_PRS_LU_VLAN,
+ MVPP2_PRS_LU_VID,
MVPP2_PRS_LU_L2,
MVPP2_PRS_LU_PPPOE,
MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
}
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+ unsigned short vid)
+{
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
/* Set bits in sram sw entry */
static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
pe.index = tid;
- /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
- mvpp2_prs_sram_shift_set(&pe, shift,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
/* Update shadow table */
mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
if (tagged) {
/* Set tagged bit in DSA tag */
mvpp2_prs_tcam_data_byte_set(&pe, 0,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
- /* Clear all ai bits for next iteration */
- mvpp2_prs_sram_ai_update(&pe, 0,
- MVPP2_PRS_SRAM_AI_MASK);
- /* If packet is tagged continue check vlans */
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+ /* Set ai bits for next iteration */
+ if (extend)
+ mvpp2_prs_sram_ai_update(&pe, 1,
+ MVPP2_PRS_SRAM_AI_MASK);
+ else
+ mvpp2_prs_sram_ai_update(&pe, 0,
+ MVPP2_PRS_SRAM_AI_MASK);
+
+ /* If packet is tagged continue check vid filtering */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
} else {
+ /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+ mvpp2_prs_sram_shift_set(&pe, shift,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
/* Set result info bits to 'no vlans' */
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
mvpp2_prs_match_etype(pe, 0, tpid);
- mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
- /* Shift 4 bytes - skip 1 vlan tag */
- mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+ /* VLAN tag detected, proceed with VID filtering */
+ mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
/* Clear all ai bits for next iteration */
mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
mvpp2_prs_match_etype(pe, 4, tpid2);
mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
- /* Shift 8 bytes - skip 2 vlan tags */
- mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+ /* Shift 4 bytes - skip outer vlan tag */
+ mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
mvpp2_prs_hw_write(priv, &pe);
}
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+ struct mvpp2_prs_entry pe;
+
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 4 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ /* Set default vid entry for extended DSA*/
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+ MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Match basic ethertypes */
static int mvpp2_prs_etype_init(struct mvpp2 *priv)
{
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
pe.index = MVPP2_PE_VLAN_DBL;
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
/* Clear ai for next iterations */
mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
return 0;
}
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+ u16 mask)
+{
+ unsigned char byte[2], enable[2];
+ struct mvpp2_prs_entry pe;
+ u16 rvid, rmask;
+ int tid;
+
+ /* Go through the all entries with MVPP2_PRS_LU_VID */
+ for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+ tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+ if (!priv->prs_shadow[tid].valid ||
+ priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+ continue;
+
+ pe.index = tid;
+
+ mvpp2_prs_hw_read(priv, &pe);
+ mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+ mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+ rvid = ((byte[0] & 0xf) << 8) + byte[1];
+ rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+ if (rvid != vid || rmask != mask)
+ continue;
+
+ return tid;
+ }
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+ unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+ port->id * MVPP2_PRS_VLAN_FILT_MAX;
+ unsigned int mask = 0xfff, reg_val, shift;
+ struct mvpp2 *priv = port->priv;
+ struct mvpp2_prs_entry pe;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ /* No such entry */
+ if (!tid) {
+ memset(&pe, 0, sizeof(pe));
+
+ /* Go through all entries from first to last in vlan range */
+ tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+ vid_start +
+ MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+ /* There isn't room for a new VID filter */
+ if (tid < 0)
+ return tid;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+ pe.index = tid;
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+ } else {
+ mvpp2_prs_hw_read(priv, &pe);
+ }
+
+ /* Enable the current port */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Set match on VID */
+ mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+ /* No such entry */
+ if (tid)
+ return;
+
+ mvpp2_prs_hw_inv(priv, tid);
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+ tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+ if (priv->prs_shadow[tid].valid)
+ mvpp2_prs_vid_entry_remove(port, tid);
+ }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+
+ /* Invalidate the guard entry */
+ mvpp2_prs_hw_inv(priv, tid);
+
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+ unsigned int reg_val, shift;
+ struct mvpp2_prs_entry pe;
+
+ if (priv->prs_shadow[tid].valid)
+ return;
+
+ memset(&pe, 0, sizeof(pe));
+
+ pe.index = tid;
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+ /* Update port mask */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Drop VLAN packets that don't belong to any VIDs on this port */
+ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+ MVPP2_PRS_RI_DROP_MASK);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Parser default initialization */
static int mvpp2_prs_default_init(struct platform_device *pdev,
struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
mvpp2_prs_dsa_init(priv);
+ mvpp2_prs_vid_init(priv);
+
err = mvpp2_prs_etype_init(priv);
if (err)
return err;
@@ -7153,6 +7447,12 @@ retry:
}
}
}
+
+ /* Disable VLAN filtering in promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ mvpp2_prs_vid_disable_filtering(port);
+ else
+ mvpp2_prs_vid_enable_filtering(port);
}
static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return ret;
}
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ ret = mvpp2_prs_vid_entry_add(port, vid);
+ if (ret)
+ netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+ MVPP2_PRS_VLAN_FILT_MAX - 1);
+ return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ mvpp2_prs_vid_entry_remove(port, vid);
+ return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = dev->features ^ features;
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ mvpp2_prs_vid_enable_filtering(port);
+ } else {
+ /* Invalidate all registered VID filters for this
+ * port
+ */
+ mvpp2_prs_vid_remove_all(port);
+
+ mvpp2_prs_vid_disable_filtering(port);
+ }
+ }
+
+ return 0;
+}
+
/* Ethtool methods */
/* Set interrupt coalescing for ethtools */
@@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
.ndo_change_mtu = mvpp2_change_mtu,
.ndo_get_stats64 = mvpp2_get_stats64,
.ndo_do_ioctl = mvpp2_ioctl,
+ .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid,
+ .ndo_set_features = mvpp2_set_features,
};
static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
- dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+ dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8d2d140d7910..7c6204f701ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
xstats->tail_drop[i] =
mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
}
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+ xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+ }
}
static void update_stats_cache(struct work_struct *work)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 2673310f92da..d5e711d8ad71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats {
u64 wred_drop[TC_MAX_QUEUE];
u64 tail_drop[TC_MAX_QUEUE];
u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_port {
@@ -247,6 +249,7 @@ struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
#include "reg.h"
#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+ MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
struct mlxsw_sp_qdisc {
u32 handle;
u8 tclass_num;
+ u8 prio_bitmap;
union {
struct red_stats red;
} xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
mlxsw_sp_qdisc->handle == handle;
}
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+ bool root_only)
+{
+ int tclass, child_index;
+
+ if (parent == TC_H_ROOT)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (root_only || !mlxsw_sp_port->root_qdisc ||
+ !mlxsw_sp_port->root_qdisc->ops ||
+ TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+ TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+ return NULL;
+
+ child_index = TC_H_MIN(parent);
+ tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+ return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+ int i;
+
+ if (mlxsw_sp_port->root_qdisc->handle == handle)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+ return NULL;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+ return &mlxsw_sp_port->tclass_qdiscs[i];
+
+ return NULL;
+}
+
static int
mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
return -EOPNOTSUPP;
}
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+ u8 prio_bitmap, u64 *tx_packets,
+ u64 *tx_bytes)
+{
+ int i;
+
+ *tx_packets = 0;
+ *tx_bytes = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (prio_bitmap & BIT(i)) {
+ *tx_packets += xstats->tx_packets[i];
+ *tx_bytes += xstats->tx_bytes[i];
+ }
+ }
+}
+
static int
mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
struct red_stats *red_base;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
red_base = &mlxsw_sp_qdisc->xstats_base.red;
- stats_base->tx_packets = stats->tx_packets;
- stats_base->tx_bytes = stats->tx_bytes;
-
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &stats_base->tx_packets,
+ &stats_base->tx_bytes);
red_base->prob_mark = xstats->ecn;
red_base->prob_drop = xstats->wred_drop[tclass_num];
red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
+ struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+ if (root_qdisc != mlxsw_sp_qdisc)
+ root_qdisc->stats_base.backlog -=
+ mlxsw_sp_qdisc->stats_base.backlog;
+
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
mlxsw_sp_qdisc->tclass_num);
}
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
- tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
- tx_packets = stats->tx_packets - stats_base->tx_packets;
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &tx_packets, &tx_bytes);
+ tx_bytes = tx_bytes - stats_base->tx_bytes;
+ tx_packets = tx_packets - stats_base->tx_packets;
+
overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
stats_base->overlimits;
drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
if (p->command == TC_RED_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
{
int i;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[i]);
+ mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+ }
return 0;
}
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
- int tclass, i;
+ struct mlxsw_sp_qdisc *child_qdisc;
+ int tclass, i, band, backlog;
+ u8 old_priomap;
int err;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
- err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
- if (err)
- return err;
+ for (band = 0; band < p->bands; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ old_priomap = child_qdisc->prio_bitmap;
+ child_qdisc->prio_bitmap = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (p->priomap[i] == band) {
+ child_qdisc->prio_bitmap |= BIT(i);
+ if (BIT(i) & old_priomap)
+ continue;
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+ i, tclass);
+ if (err)
+ return err;
+ }
+ }
+ if (old_priomap != child_qdisc->prio_bitmap &&
+ child_qdisc->ops && child_qdisc->ops->clean_stats) {
+ backlog = child_qdisc->stats_base.backlog;
+ child_qdisc->ops->clean_stats(mlxsw_sp_port,
+ child_qdisc);
+ child_qdisc->stats_base.backlog = backlog;
+ }
+ }
+ for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ child_qdisc->prio_bitmap = 0;
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
}
-
return 0;
}
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += xstats->tail_drop[i];
+ drops += xstats->wred_drop[i];
backlog += xstats->backlog[i];
}
drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
stats_base->tx_bytes = stats->tx_bytes;
stats_base->drops = 0;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
stats_base->drops += xstats->tail_drop[i];
+ stats_base->drops += xstats->wred_drop[i];
+ }
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ struct mlxsw_sp_qdisc *old_qdisc;
+
+ /* Check if the grafted qdisc is already in its "new" location. If so -
+ * nothing needs to be done.
+ */
+ if (p->band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ return 0;
+
+ /* See if the grafted qdisc is already offloaded on any tclass. If so,
+ * unoffload it.
+ */
+ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+ p->child_handle);
+ if (old_qdisc)
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+ return -EOPNOTSUPP;
+}
+
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
if (p->command == TC_PRIO_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
case TC_PRIO_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
+ case TC_PRIO_GRAFT:
+ return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->graft_params);
default:
return -EOPNOTSUPP;
}
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
- mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
- GFP_KERNEL);
- if (!mlxsw_sp_port->root_qdisc)
- return -ENOMEM;
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+ int i;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_root_qdisc_init;
+
+ mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+ mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+ GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_tclass_qdiscs_init;
+
+ mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
return 0;
+
+err_tclass_qdiscs_init:
+ kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+ return -ENOMEM;
}
void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
+ kfree(mlxsw_sp_port->tclass_qdiscs);
kfree(mlxsw_sp_port->root_qdisc);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ca4a81dc1ace..03ad4eeac7f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index cc51286ee51f..0a0638d692f9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -735,10 +735,6 @@ struct ring_info {
u8 __pad[sizeof(void *) - sizeof(u32)];
};
-enum features {
- RTL_FEATURE_GMII = (1 << 0),
-};
-
struct rtl8169_counters {
__le64 tx_packets;
__le64 rx_packets;
@@ -8233,7 +8229,7 @@ static const struct rtl_cfg_info {
unsigned int region;
unsigned int align;
u16 event_slow;
- unsigned features;
+ unsigned int has_gmii:1;
const struct rtl_coalesce_info *coalesce_info;
u8 default_ver;
} rtl_cfg_infos [] = {
@@ -8242,7 +8238,7 @@ static const struct rtl_cfg_info {
.region = 1,
.align = 0,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
- .features = RTL_FEATURE_GMII,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8169,
.default_ver = RTL_GIGA_MAC_VER_01,
},
@@ -8251,7 +8247,7 @@ static const struct rtl_cfg_info {
.region = 2,
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow,
- .features = RTL_FEATURE_GMII,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_11,
},
@@ -8394,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
mii->mdio_write = rtl_mdio_write;
mii->phy_id_mask = 0x1f;
mii->reg_num_mask = 0x1f;
- mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+ mii->supports_gmii = cfg->has_gmii;
/* disable ASPM completely as that cause random device stop working
* problems as well as full system hangs for some PCIe devices users */
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..a115f12bf130 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t sfeatures;
u32 msg_enable;
+ spinlock_t addrs_lock;
};
struct ipvl_addr {
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1b5dc200b573..17daebd19e65 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6)
{
- struct ipvl_addr *addr;
+ struct ipvl_addr *addr, *ret = NULL;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
- if (addr_equal(is_v6, addr, iaddr))
- return addr;
- return NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+ if (addr_equal(is_v6, addr, iaddr)) {
+ ret = addr;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
{
struct ipvl_dev *ipvlan;
+ bool ret = false;
- ASSERT_RTNL();
-
- list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
- if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
- return true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+ if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+ ret = true;
+ break;
+ }
}
- return false;
+ rcu_read_unlock();
+ return ret;
}
static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
/* In this mode we dont care about multicast and broadcast traffic */
if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
- ntohs(skb->protocol));
+ pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
kfree_skb(skb);
goto out;
}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index d05b902c925b..3efc1c92c6a7 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
else
dev->flags &= ~IFF_NOARP;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_add(ipvlan, addr);
+ rcu_read_unlock();
return dev_uc_add(phy_dev, phy_dev->dev_addr);
}
@@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_del(phy_dev, phy_dev->dev_addr);
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
+ rcu_read_unlock();
return 0;
}
@@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
ipvlan->sfeatures = IPVLAN_FEATURES;
ipvlan_adjust_mtu(ipvlan, phy_dev);
INIT_LIST_HEAD(&ipvlan->addrs);
+ spin_lock_init(&ipvlan->addrs_lock);
/* TODO Probably put random address here to be presented to the
* world but keep using the physical-dev address for the outgoing
@@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
+ spin_lock_bh(&ipvlan->addrs_lock);
list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
kfree_rcu(addr, rcu);
}
+ spin_unlock_bh(&ipvlan->addrs_lock);
ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
list_del_rcu(&ipvlan->pnode);
@@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
if (dev->reg_state != NETREG_UNREGISTERING)
break;
- list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
- pnode)
+ list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
&lst_kill);
unregister_netdevice_many(&lst_kill);
@@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
return NOTIFY_DONE;
}
+/* the caller must held the addrs lock */
static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
@@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
addr->atype = IPVL_IPV6;
#endif
}
- list_add_tail(&addr->anode, &ipvlan->addrs);
+
+ list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
/* If the interface is not up, the address will be added to the hash
* list by ipvlan_open.
@@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
+ spin_lock_bh(&ipvlan->addrs_lock);
addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
- if (!addr)
+ if (!addr) {
+ spin_unlock_bh(&ipvlan->addrs_lock);
return;
+ }
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
+ spin_unlock_bh(&ipvlan->addrs_lock);
kfree_rcu(addr, rcu);
-
- return;
}
static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
#if IS_ENABLED(CONFIG_IPV6)
static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv6=%pI6c addr for %s intf\n",
ip6_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip6_addr, true);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- /* FIXME IPv6 autoconf calls us from bh without RTNL */
- if (in_softirq())
- return NOTIFY_DONE;
-
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
@@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv4=%pI4 on %s intf.\n",
ip4_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip4_addr, false);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..27327c917a59 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1584,25 +1584,14 @@ static int phylink_sfp_module_insert(void *upstream,
bool changed;
u8 port;
- sfp_parse_support(pl->sfp_bus, id, support);
- port = sfp_parse_port(pl->sfp_bus, id, support);
- iface = sfp_parse_interface(pl->sfp_bus, id);
-
ASSERT_RTNL();
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- case PHY_INTERFACE_MODE_10GKR:
- break;
- default:
- return -EINVAL;
- }
+ sfp_parse_support(pl->sfp_bus, id, support);
+ port = sfp_parse_port(pl->sfp_bus, id, support);
memset(&config, 0, sizeof(config));
linkmode_copy(config.advertising, support);
- config.interface = iface;
+ config.interface = PHY_INTERFACE_MODE_NA;
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
config.pause = MLO_PAUSE_AN;
@@ -1611,6 +1600,22 @@ static int phylink_sfp_module_insert(void *upstream,
/* Ignore errors if we're expecting a PHY to attach later */
ret = phylink_validate(pl, support, &config);
if (ret) {
+ netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ return ret;
+ }
+
+ iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+ if (iface == PHY_INTERFACE_MODE_NA) {
+ netdev_err(pl->netdev,
+ "selection of interface failed, advertisment %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+ return -EINVAL;
+ }
+
+ config.interface = iface;
+ ret = phylink_validate(pl, support, &config);
+ if (ret) {
netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
phylink_an_mode_str(MLO_AN_INBAND),
phy_modes(config.interface),
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
EXPORT_SYMBOL_GPL(sfp_parse_port);
/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- phy_interface_t iface;
-
- /* Setting the serdes link mode is guesswork: there's no field in
- * the EEPROM which indicates what mode should be used.
- *
- * If the module wants 64b66b, then it must be >= 10G.
- *
- * If it's a gigabit-only fiber module, it probably does not have
- * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
- * to SGMII mode (which is required to support non-gigabit speeds).
- */
- switch (id->base.encoding) {
- case SFP_ENCODING_8472_64B66B:
- iface = PHY_INTERFACE_MODE_10GKR;
- break;
-
- case SFP_ENCODING_8B10B:
- if (!id->base.e1000_base_t &&
- !id->base.e100_base_lx &&
- !id->base.e100_base_fx)
- iface = PHY_INTERFACE_MODE_1000BASEX;
- else
- iface = PHY_INTERFACE_MODE_SGMII;
- break;
-
- default:
- if (id->base.e1000_base_cx) {
- iface = PHY_INTERFACE_MODE_1000BASEX;
- break;
- }
-
- iface = PHY_INTERFACE_MODE_NA;
- dev_err(bus->sfp_dev,
- "SFP module encoding does not support 8b10b nor 64b66b\n");
- break;
- }
-
- return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
-/**
* sfp_parse_support() - Parse the eeprom id for supported link modes
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
* @id: a pointer to the module's &struct sfp_eeprom_id
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support)
{
unsigned int br_min, br_nom, br_max;
-
- phylink_set(support, Autoneg);
- phylink_set(support, Pause);
- phylink_set(support, Asym_Pause);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
/* Decode the bitrate information to MBd */
br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
/* Set ethtool support from the compliance fields. */
if (id->base.e10g_base_sr)
- phylink_set(support, 10000baseSR_Full);
+ phylink_set(modes, 10000baseSR_Full);
if (id->base.e10g_base_lr)
- phylink_set(support, 10000baseLR_Full);
+ phylink_set(modes, 10000baseLR_Full);
if (id->base.e10g_base_lrm)
- phylink_set(support, 10000baseLRM_Full);
+ phylink_set(modes, 10000baseLRM_Full);
if (id->base.e10g_base_er)
- phylink_set(support, 10000baseER_Full);
+ phylink_set(modes, 10000baseER_Full);
if (id->base.e1000_base_sx ||
id->base.e1000_base_lx ||
id->base.e1000_base_cx)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
if (id->base.e1000_base_t) {
- phylink_set(support, 1000baseT_Half);
- phylink_set(support, 1000baseT_Full);
+ phylink_set(modes, 1000baseT_Half);
+ phylink_set(modes, 1000baseT_Full);
}
/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
/* This may look odd, but some manufacturers use 12000MBd */
if (br_min <= 12000 && br_max >= 10300)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
if (br_min <= 3200 && br_max >= 3100)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (br_min <= 1300 && br_max >= 1200)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
if (id->base.sfp_ct_passive) {
if (id->base.passive.sff8431_app_e)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
if (id->base.sfp_ct_active) {
if (id->base.active.sff8431_app_e ||
id->base.active.sff8431_lim) {
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
}
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
case 0x00: /* Unspecified */
break;
case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
- phylink_set(support, 100000baseSR4_Full);
- phylink_set(support, 25000baseSR_Full);
+ phylink_set(modes, 100000baseSR4_Full);
+ phylink_set(modes, 25000baseSR_Full);
break;
case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
- phylink_set(support, 100000baseLR4_ER4_Full);
+ phylink_set(modes, 100000baseLR4_ER4_Full);
break;
case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
case 0x0c: /* 25Gbase-CR CA-S */
case 0x0d: /* 25Gbase-CR CA-N */
- phylink_set(support, 100000baseCR4_Full);
- phylink_set(support, 25000baseCR_Full);
+ phylink_set(modes, 100000baseCR4_Full);
+ phylink_set(modes, 25000baseCR_Full);
break;
default:
dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
id->base.fc_speed_200 ||
id->base.fc_speed_400) {
if (id->base.br_nominal >= 31)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (id->base.br_nominal >= 12)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
+
+ /* If we haven't discovered any modes that this module supports, try
+ * the encoding and bitrate to determine supported modes. Some BiDi
+ * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+ * the differing wavelengths, so do not set any transceiver bits.
+ */
+ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ /* If the encoding and bit rate allows 1000baseX */
+ if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+ br_min <= 1300 && br_max >= 1200)
+ phylink_set(modes, 1000baseX_Full);
+ }
+
+ bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ phylink_set(support, Autoneg);
+ phylink_set(support, Pause);
+ phylink_set(support, Asym_Pause);
}
EXPORT_SYMBOL_GPL(sfp_parse_support);
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ if (phylink_test(link_modes, 10000baseCR_Full) ||
+ phylink_test(link_modes, 10000baseSR_Full) ||
+ phylink_test(link_modes, 10000baseLR_Full) ||
+ phylink_test(link_modes, 10000baseLRM_Full) ||
+ phylink_test(link_modes, 10000baseER_Full))
+ return PHY_INTERFACE_MODE_10GKR;
+
+ if (phylink_test(link_modes, 2500baseX_Full))
+ return PHY_INTERFACE_MODE_2500BASEX;
+
+ if (id->base.e1000_base_t ||
+ id->base.e100_base_lx ||
+ id->base.e100_base_fx)
+ return PHY_INTERFACE_MODE_SGMII;
+
+ if (phylink_test(link_modes, 1000baseX_Full))
+ return PHY_INTERFACE_MODE_1000BASEX;
+
+ dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+ return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
static LIST_HEAD(sfp_buses);
static DEFINE_MUTEX(sfp_mutex);
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
SFP_MOD_EMPTY = 0,
SFP_MOD_PROBE,
+ SFP_MOD_HPOWER,
SFP_MOD_PRESENT,
SFP_MOD_ERROR,
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
* access the I2C EEPROM. However, Avago modules require 300ms.
*/
#define T_PROBE_INIT msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
#define T_PROBE_RETRY msecs_to_jiffies(100)
/* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
struct sfp_bus *sfp_bus;
struct phy_device *mod_phy;
const struct sff_data *type;
+ u32 max_power_mW;
unsigned int (*get_state)(struct sfp *);
void (*set_state)(struct sfp *, unsigned int);
int (*read)(struct sfp *, bool, u8, void *, size_t);
+ int (*write)(struct sfp *, bool, u8, void *, size_t);
struct gpio_desc *gpio[GPIO_MAX];
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
}
}
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
- void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
struct i2c_msg msgs[2];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
int ret;
msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
msgs[1].len = len;
msgs[1].buf = buf;
- ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
if (ret < 0)
return ret;
return ret == ARRAY_SIZE(msgs) ? len : 0;
}
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
- size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
- return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+ struct i2c_msg msgs[1];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
+ int ret;
+
+ msgs[0].addr = bus_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = 1 + len;
+ msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+ if (!msgs[0].buf)
+ return -ENOMEM;
+
+ msgs[0].buf[0] = dev_addr;
+ memcpy(&msgs[0].buf[1], buf, len);
+
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+ kfree(msgs[0].buf);
+
+ if (ret < 0)
+ return ret;
+
+ return ret == ARRAY_SIZE(msgs) ? len : 0;
}
static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
sfp->i2c = i2c;
sfp->read = sfp_i2c_read;
+ sfp->write = sfp_i2c_write;
i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
return sfp->read(sfp, a2, addr, buf, len);
}
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+ return sfp->write(sfp, a2, addr, buf, len);
+}
+
static unsigned int sfp_check(void *buf, size_t len)
{
u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
sfp_sm_probe_phy(sfp);
}
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+ u32 power;
+ u8 val;
+ int err;
+
+ power = 1000;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+ power = 1500;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+ power = 2000;
+
+ if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+ (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+ SFP_DIAGMON_DDM) {
+ /* The module appears not to implement bus address 0xa2,
+ * or requires an address change sequence, so assume that
+ * the module powers up in the indicated power mode.
+ */
+ if (power > sfp->max_power_mW) {
+ dev_err(sfp->dev,
+ "Host does not support %u.%uW modules\n",
+ power / 1000, (power / 100) % 10);
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (power > sfp->max_power_mW) {
+ dev_warn(sfp->dev,
+ "Host does not support %u.%uW modules, module left in power mode 1\n",
+ power / 1000, (power / 100) % 10);
+ return 0;
+ }
+
+ if (power <= 1000)
+ return 0;
+
+ err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ val |= BIT(0);
+
+ err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+ power / 1000, (power / 100) % 10);
+ return T_HPOWER_LEVEL;
+
+err:
+ return err;
+}
+
static int sfp_sm_mod_probe(struct sfp *sfp)
{
/* SFP module inserted - read I2C data */
struct sfp_eeprom_id id;
u8 check;
- int err;
+ int ret;
- err = sfp_read(sfp, false, 0, &id, sizeof(id));
- if (err < 0) {
- dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+ ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+ if (ret < 0) {
+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
return -EAGAIN;
}
- if (err != sizeof(id)) {
- dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+ if (ret != sizeof(id)) {
+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
return -EAGAIN;
}
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
dev_warn(sfp->dev,
"module address swap to access page 0xA2 is not supported.\n");
- return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ if (ret < 0)
+ return ret;
+
+ return sfp_sm_mod_hpower(sfp);
}
static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
if (event == SFP_E_REMOVE) {
sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
} else if (event == SFP_E_TIMEOUT) {
- int err = sfp_sm_mod_probe(sfp);
+ int val = sfp_sm_mod_probe(sfp);
- if (err == 0)
+ if (val == 0)
sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
- else if (err == -EAGAIN)
- sfp_sm_set_timer(sfp, T_PROBE_RETRY);
- else
+ else if (val > 0)
+ sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+ else if (val != -EAGAIN)
sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+ else
+ sfp_sm_set_timer(sfp, T_PROBE_RETRY);
}
break;
+ case SFP_MOD_HPOWER:
+ if (event == SFP_E_TIMEOUT) {
+ sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+ break;
+ }
+ /* fallthrough */
case SFP_MOD_PRESENT:
case SFP_MOD_ERROR:
if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
if (!(sfp->gpio[GPIO_MODDEF0]))
sfp->get_state = sff_gpio_get_state;
+ device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+ &sfp->max_power_mW);
+ if (!sfp->max_power_mW)
+ sfp->max_power_mW = 1000;
+
+ dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+ sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
if (!sfp->sfp_bus)
return -ENOMEM;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a468439969df..5dd781e65958 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
}
#endif
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info, NULL);
+ &lag_upper_info, extack);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
static int team_dev_type_check_change(struct net_device *dev,
struct net_device *port_dev);
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = team->dev;
struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
int err;
if (port_dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
portname);
return -EINVAL;
}
if (team_port_exists(port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
netdev_err(dev, "Device %s is already a port "
"of a team device\n", portname);
return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
portname);
return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
return err;
if (port_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
portname);
return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
goto err_handler_register;
}
- err = team_upper_dev_link(team, port);
+ err = team_upper_dev_link(team, port, extack);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
int err;
mutex_lock(&team->lock);
- err = team_port_add(team, port_dev);
+ err = team_port_add(team, port_dev, extack);
mutex_unlock(&team->lock);
if (!err)
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
#if IS_ENABLED(CONFIG_SFP)
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes);
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
return PORT_OTHER;
}
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- return PHY_INTERFACE_MODE_NA;
-}
-
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
unsigned long *support)
{
}
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ return PHY_INTERFACE_MODE_NA;
+}
+
static inline int sfp_get_module_info(struct sfp_bus *bus,
struct ethtool_modinfo *modinfo)
{
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
};
#endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..64e7ee9cb980 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
fl4->daddr = daddr;
fl4->saddr = saddr;
}
-
+
struct flowi6 {
struct flowi_common __fl_common;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
u16 net_header_len;
u16 net_frag_header_len;
u16 sockaddr_len;
- int (*setsockopt)(struct sock *sk, int level, int optname,
+ int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level, int optname,
+ int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
/** inet_connection_sock - INET connection oriented sock
*
- * @icsk_accept_queue: FIFO of established children
+ * @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
- __u16 rcv_mss; /* MSS used for delayed ACK decisions */
+ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
int enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
-
+
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..fe63ba95d12b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void ip4_datagram_release_cb(struct sock *sk);
struct ip_reply_arg {
- struct kvec iov[1];
+ struct kvec iov[1];
int flags;
__wsum csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
- /* -1 if not needed */
+ /* -1 if not needed */
int bound_dev_if;
u8 tos;
kuid_t uid;
-};
+};
#define IP_REPLY_ARG_NOSRCCHECK 1
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
/*
* Functions provided by ip_forward.c
*/
-
+
int ip_forward(struct sk_buff *skb);
-
+
/*
* Functions provided by ip_options.c
*/
-
+
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..15e19c5c6f26 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -157,7 +157,7 @@ struct fib_result_nl {
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
- int err;
+ int err;
};
#ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7a98cd583c73..cabd3cdd4015 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
#define IPV6_ADDR_ANY 0x0000U
-#define IPV6_ADDR_UNICAST 0x0001U
-#define IPV6_ADDR_MULTICAST 0x0002U
+#define IPV6_ADDR_UNICAST 0x0001U
+#define IPV6_ADDR_MULTICAST 0x0002U
#define IPV6_ADDR_LOOPBACK 0x0010U
#define IPV6_ADDR_LINKLOCAL 0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
#endif
}
-static inline void ipv6_addr_prefix(struct in6_addr *pfx,
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
const struct in6_addr *addr,
int plen)
{
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
addr[1] = wl;
}
-static inline void ipv6_addr_set(struct in6_addr *addr,
+static inline void ipv6_addr_set(struct in6_addr *addr,
__be32 w1, __be32 w2,
__be32 w3, __be32 w4)
{
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
}
/*
- * we should *never* get to this point since that
+ * we should *never* get to this point since that
* would mean the addrs are equal
*
* However, we do get to it 8) And exacly, when
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
TC_PRIO_REPLACE,
TC_PRIO_DESTROY,
TC_PRIO_STATS,
+ TC_PRIO_GRAFT,
};
struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
struct gnet_stats_queue *qstats;
};
+struct tc_prio_qopt_offload_graft_params {
+ u8 band;
+ u32 child_handle;
+};
+
struct tc_prio_qopt_offload {
enum tc_prio_command command;
u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
union {
struct tc_prio_qopt_offload_params replace_params;
struct tc_qopt_offload_stats stats;
+ struct tc_prio_qopt_offload_graft_params graft_params;
};
};
+
#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
static inline void xfrm_sk_free_policy(struct sock *sk) {}
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{
- return 1;
-}
+{
+ return 1;
+}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
{
if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
(ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
- ipv6_addr_any((struct in6_addr *)saddr) ||
+ ipv6_addr_any((struct in6_addr *)saddr) ||
ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
return 1;
return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
return -ENOPROTOOPT;
-}
+}
static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index cd46d7666598..f31e6575ab91 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index fdabc70283b6..d97e83b2dd33 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 796ac4115485..0c752dc3f93b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..d7d0abc7fd0e 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b8858c546f41..1678cf037688 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 27e672c12492..68f9d942bed4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
unsigned int len)
{
+ bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
* If child was empty even before update then backlog
* counter is screwed and we skip notification because
* parent class is already passive.
+ *
+ * If the original child was offloaded then it is allowed
+ * to be seem as empty, so the parent is notified anyway.
*/
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
{
- struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- if (enable) {
+ if (qopt) {
opt.command = TC_PRIO_REPLACE;
- opt.replace_params.bands = q->bands;
- memcpy(&opt.replace_params.priomap, q->prio2band,
+ opt.replace_params.bands = qopt->bands;
+ memcpy(&opt.replace_params.priomap, qopt->priomap,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- prio_offload(sch, false);
+ prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
}
+ prio_offload(sch, qopt);
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_unlock(sch);
- prio_offload(sch, true);
return 0;
}
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tc_prio_qopt_offload graft_offload;
+ struct net_device *dev = qdisc_dev(sch);
unsigned long band = arg - 1;
+ bool any_qdisc_is_offloaded;
+ int err;
if (new == NULL)
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->queues[band]);
+
+ if (!tc_can_offload(dev))
+ return 0;
+
+ graft_offload.handle = sch->handle;
+ graft_offload.parent = sch->parent;
+ graft_offload.graft_params.band = band;
+ graft_offload.graft_params.child_handle = new->handle;
+ graft_offload.command = TC_PRIO_GRAFT;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &graft_offload);
+
+ /* Don't report error if the graft is part of destroy operation. */
+ if (err && new != &noop_qdisc) {
+ /* Don't report error if the parent, the old child and the new
+ * one are not offloaded.
+ */
+ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+ any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+ if (*old)
+ any_qdisc_is_offloaded |= (*old)->flags &
+ TCQ_F_OFFLOADED;
+
+ if (any_qdisc_is_offloaded)
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+ }
+
return 0;
}
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..651998e70557
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..ab235c124f20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..23866a685f77
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,533 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+ source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+tc -j &> /dev/null
+if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old, missing JSON support"
+ exit 0
+fi
+
+tc filter help 2>&1 | grep block &> /dev/null
+if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old, missing shared block support"
+ exit 0
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+ echo "SKIP: jq not installed"
+ exit 0
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+ echo "SKIP: $MZ not installed"
+ exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 0
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ while true; do
+ ip link show dev ${NETIFS[p$i]} up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __addr_add_del $if_name del "${array[@]}"
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+ ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+ ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+ sysctl -q -w net.ipv4.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+ sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+ sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping"
+}
+
+ping6_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..d31888e3133e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,322 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+ local weight_rp12=$1
+ local weight_rp13=$2
+ local packets_rp12=$3
+ local packets_rp13=$4
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+ bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+ bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+ local weight_rp12=$1
+ local weight_rp13=$2
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+ local weight_rp12=$1
+ local weight_rp13=$2
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 ping 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test 1 1
+ multipath4_test 2 1
+ multipath4_test 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test 1 1
+ multipath6_test 2 1
+ multipath6_test 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..84234317a25d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming redirected packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ gact_drop_and_ok_test
+ mirred_egress_redirect_test
+ gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..94c114ad8b44
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ unreachable_chain_test
+ gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..acd0b520241c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..026a4ea4b2fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ match_dst_mac_test
+ match_src_mac_test
+ match_dst_ip_test
+ match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..cfc8a2ace388
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ shared_block_test
+fi
+
+exit $EXIT_STATUS