summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst10
-rw-r--r--Documentation/bpf/btf.rst4
-rw-r--r--Documentation/bpf/index.rst3
-rw-r--r--Documentation/bpf/llvm_reloc.rst6
-rw-r--r--Documentation/bpf/standardization/index.rst18
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst (renamed from Documentation/bpf/instruction-set.rst)2
-rw-r--r--Documentation/bpf/standardization/linux-notes.rst (renamed from Documentation/bpf/linux-notes.rst)3
-rw-r--r--Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml155
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml20
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx,can.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,net.yaml109
-rw-r--r--Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml34
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar803x.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.yaml1
-rw-r--r--Documentation/netlink/specs/netdev.yaml6
-rw-r--r--Documentation/networking/af_xdp.rst211
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst8
-rw-r--r--Documentation/networking/ip-sysctl.rst9
-rw-r--r--Documentation/networking/netconsole.rst11
-rw-r--r--Documentation/networking/page_pool.rst11
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/x86/net/bpf_jit_comp.c246
-rw-r--r--drivers/connector/cn_proc.c111
-rw-r--r--drivers/connector/connector.c40
-rw-r--r--drivers/hid/bpf/entrypoints/Makefile2
-rw-r--r--drivers/net/can/Kconfig5
-rw-r--r--drivers/net/can/grcan.c3
-rw-r--r--drivers/net/can/kvaser_pciefd.c307
-rw-r--r--drivers/net/can/m_can/m_can.c32
-rw-r--r--drivers/net/can/m_can/m_can.h3
-rw-r--r--drivers/net/can/m_can/m_can_platform.c21
-rw-r--r--drivers/net/can/sja1000/ems_pci.c6
-rw-r--r--drivers/net/can/usb/ucan.c2
-rw-r--r--drivers/net/can/xilinx_can.c25
-rw-r--r--drivers/net/dsa/b53/b53_common.c6
-rw-r--r--drivers/net/dsa/b53/b53_mdio.c1
-rw-r--r--drivers/net/dsa/b53/b53_mmap.c1
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek.c1
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek_ptp.c1
-rw-r--r--drivers/net/dsa/lan9303-core.c7
-rw-r--r--drivers/net/dsa/microchip/ksz8863_smi.c3
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c4
-rw-r--r--drivers/net/dsa/mt7530-mmio.c3
-rw-r--r--drivers/net/dsa/mt7530.c6
-rw-r--r--drivers/net/dsa/mv88e6xxx/Makefile3
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c426
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h33
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-6185.c190
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-6352.c390
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-639x.c898
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c30
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c1106
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.h108
-rw-r--r--drivers/net/dsa/ocelot/felix.c6
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c1
-rw-r--r--drivers/net/dsa/ocelot/seville_vsc9953.c3
-rw-r--r--drivers/net/dsa/qca/ar9331.c2
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c2
-rw-r--r--drivers/net/dsa/qca/qca8k-leds.c1
-rw-r--r--drivers/net/dsa/realtek/realtek-mdio.c2
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.c1
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c7
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-core.c1
-rw-r--r--drivers/net/dsa/xrs700x/xrs700x.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c12
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c2
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig12
-rw-r--r--drivers/net/ethernet/broadcom/Makefile1
-rw-r--r--drivers/net/ethernet/broadcom/asp2/Makefile2
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c1437
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.h586
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c503
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c1415
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h257
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c156
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c4
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c13
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c2
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c16
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c9
-rw-r--r--drivers/net/ethernet/freescale/fec.h5
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c51
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c6
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx_phy.c6
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c16
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c5
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c5
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c6
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mii-fec.c6
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c6
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c6
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c6
-rw-r--r--drivers/net/ethernet/google/gve/gve_desc.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns_mdio.c10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c6
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c65
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c101
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile2
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c46
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch_br.c1309
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch_br.h120
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c150
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_trace.h90
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c186
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c84
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c221
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c35
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c320
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c398
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.h11
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_path.c36
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c498
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h328
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c18
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.h3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h166
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c351
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c200
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c601
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c170
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c5
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c54
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h1
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c160
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c70
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h5
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c4
-rw-r--r--drivers/net/ethernet/smsc/smsc9420.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h39
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c23
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c36
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c24
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c20
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c123
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c47
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c254
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h6
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c68
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h6
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c35
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c64
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c8
-rw-r--r--drivers/net/gtp.c3
-rw-r--r--drivers/net/mdio/mdio-bcm-unimac.c2
-rw-r--r--drivers/net/mdio/mdio-xgene.c4
-rw-r--r--drivers/net/netconsole.c115
-rw-r--r--drivers/net/netdevsim/Makefile4
-rw-r--r--drivers/net/netdevsim/macsec.c356
-rw-r--r--drivers/net/netdevsim/netdev.c3
-rw-r--r--drivers/net/netdevsim/netdevsim.h34
-rw-r--r--drivers/net/pcs/pcs-rzn1-miic.c1
-rw-r--r--drivers/net/phy/Kconfig7
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/at803x.c135
-rw-r--r--drivers/net/phy/bcm7xxx.c1
-rw-r--r--drivers/net/phy/marvell-88q2xxx.c263
-rw-r--r--drivers/net/phy/marvell-88x2222.c1
-rw-r--r--drivers/net/phy/mdio_bus.c37
-rw-r--r--drivers/net/phy/mediatek-ge-soc.c2
-rw-r--r--drivers/net/phy/motorcomm.c118
-rw-r--r--drivers/net/phy/phy-c45.c63
-rw-r--r--drivers/net/phy/phylink.c178
-rw-r--r--drivers/net/phy/smsc.c252
-rw-r--r--drivers/net/ppp/pppoe.c4
-rw-r--r--drivers/net/ppp/pptp.c8
-rw-r--r--drivers/net/vxlan/vxlan_core.c44
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.c17
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.h2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_mhccif.h1
-rw-r--r--drivers/net/wwan/t7xx/t7xx_modem_ops.c76
-rw-r--r--drivers/net/wwan/t7xx/t7xx_modem_ops.h2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port.h6
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c8
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_proxy.c18
-rw-r--r--drivers/net/wwan/t7xx/t7xx_reg.h2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_state_monitor.c13
-rw-r--r--drivers/net/wwan/t7xx/t7xx_state_monitor.h2
-rw-r--r--drivers/ptp/ptp_qoriq.c2
-rw-r--r--drivers/s390/net/Kconfig5
-rw-r--r--drivers/s390/net/lcs.c39
-rw-r--r--drivers/w1/w1_netlink.c6
-rw-r--r--include/linux/bpf-cgroup.h4
-rw-r--r--include/linux/bpf.h42
-rw-r--r--include/linux/bpf_mem_alloc.h2
-rw-r--r--include/linux/bpf_mprog.h327
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/btf_ids.h1
-rw-r--r--include/linux/connector.h8
-rw-r--r--include/linux/icmpv6.h10
-rw-r--r--include/linux/ipv6.h16
-rw-r--r--include/linux/lsm_hook_defs.h2
-rw-r--r--include/linux/marvell_phy.h1
-rw-r--r--include/linux/mdio.h26
-rw-r--r--include/linux/mlx5/mlx5_ifc.h7
-rw-r--r--include/linux/netdevice.h18
-rw-r--r--include/linux/netlink.h6
-rw-r--r--include/linux/phy.h1
-rw-r--r--include/linux/phylink.h86
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/security.h5
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--include/linux/smscphy.h34
-rw-r--r--include/linux/stmmac.h26
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/linux/trace_events.h3
-rw-r--r--include/net/dropreason-core.h3
-rw-r--r--include/net/dsa.h3
-rw-r--r--include/net/ip_tunnels.h1
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h2
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/page_pool.h10
-rw-r--r--include/net/pkt_cls.h1
-rw-r--r--include/net/route.h6
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/switchdev.h6
-rw-r--r--include/net/tcp.h36
-rw-r--r--include/net/tcx.h206
-rw-r--r--include/net/xdp_sock.h7
-rw-r--r--include/net/xdp_sock_drv.h54
-rw-r--r--include/net/xsk_buff_pool.h7
-rw-r--r--include/uapi/linux/bpf.h112
-rw-r--r--include/uapi/linux/cn_proc.h62
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/if_xdp.h13
-rw-r--r--include/uapi/linux/ipv6.h1
-rw-r--r--include/uapi/linux/mdio.h18
-rw-r--r--include/uapi/linux/netdev.h1
-rw-r--r--kernel/bpf/Kconfig1
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/btf.c24
-rw-r--r--kernel/bpf/cpumask.c20
-rw-r--r--kernel/bpf/hashtab.c22
-rw-r--r--kernel/bpf/helpers.c55
-rw-r--r--kernel/bpf/map_iter.c42
-rw-r--r--kernel/bpf/memalloc.c378
-rw-r--r--kernel/bpf/mprog.c445
-rw-r--r--kernel/bpf/preload/iterators/Makefile2
-rw-r--r--kernel/bpf/preload/iterators/iterators.bpf.c9
-rw-r--r--kernel/bpf/preload/iterators/iterators.lskel-little-endian.h526
-rw-r--r--kernel/bpf/ringbuf.c26
-rw-r--r--kernel/bpf/syscall.c262
-rw-r--r--kernel/bpf/tcx.c348
-rw-r--r--kernel/bpf/verifier.c64
-rw-r--r--kernel/rcu/rcu.h2
-rw-r--r--kernel/trace/bpf_trace.c49
-rw-r--r--kernel/trace/trace_kprobe.c13
-rw-r--r--kernel/trace/trace_uprobe.c3
-rw-r--r--lib/test_bpf.c12
-rw-r--r--net/Kconfig5
-rw-r--r--net/bpf/test_run.c18
-rw-r--r--net/bridge/br.c8
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h19
-rw-r--r--net/bridge/br_switchdev.c15
-rw-r--r--net/bridge/br_vlan_tunnel.c15
-rw-r--r--net/core/dev.c277
-rw-r--r--net/core/filter.c11
-rw-r--r--net/core/netdev-genl.c8
-rw-r--r--net/core/page_pool.c13
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/core/sock.c15
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/ipv6.h4
-rw-r--r--net/devlink/leftover.c4
-rw-r--r--net/dsa/port.c41
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/nexthop.c59
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/datagram.c7
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c18
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/mptcp/protocol.c16
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netlink/af_netlink.c38
-rw-r--r--net/netlink/af_netlink.h4
-rw-r--r--net/netlink/genetlink.c6
-rw-r--r--net/openvswitch/conntrack.c78
-rw-r--r--net/qrtr/af_qrtr.c5
-rw-r--r--net/qrtr/ns.c139
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/sch_htb.c7
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c1
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/vmw_vsock/virtio_transport_common.c104
-rw-r--r--net/xdp/xsk.c365
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.h95
-rw-r--r--net/xfrm/xfrm_device.c13
-rw-r--r--samples/bpf/Makefile6
-rw-r--r--samples/bpf/README.rst14
-rw-r--r--samples/bpf/gnu/stubs.h2
-rw-r--r--samples/bpf/syscall_tp_kern.c4
-rwxr-xr-xsamples/bpf/test_lwt_bpf.sh2
-rw-r--r--samples/hid/Makefile6
-rw-r--r--security/security.c2
-rw-r--r--security/selinux/hooks.c4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst26
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/feature.c2
-rw-r--r--tools/bpf/bpftool/link.c432
-rw-r--r--tools/bpf/bpftool/net.c98
-rw-r--r--tools/bpf/bpftool/netlink_dumper.h8
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c26
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c27
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c6
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h2
-rw-r--r--tools/bpf/runqslower/Makefile2
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/include/uapi/linux/bpf.h112
-rw-r--r--tools/include/uapi/linux/if_xdp.h9
-rw-r--r--tools/include/uapi/linux/netdev.h1
-rw-r--r--tools/lib/bpf/bpf.c135
-rw-r--r--tools/lib/bpf/bpf.h103
-rw-r--r--tools/lib/bpf/hashmap.h10
-rw-r--r--tools/lib/bpf/libbpf.c328
-rw-r--r--tools/lib/bpf/libbpf.h33
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_common.h16
-rw-r--r--tools/lib/bpf/netlink.c5
-rw-r--r--tools/lib/bpf/usdt.c5
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/Makefile13
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c350
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c2
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh40
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c49
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c12
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/cgroup_tcp_skb.h35
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h2
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_percpu_stats.c447
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c402
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_map_resize.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c1583
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c2239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c382
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_many_args.c39
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_many_args.c40
-rw-r--r--tools/testing/selftests/bpf/progs/htab_mem_bench.c105
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c2
-rw-r--r--tools/testing/selftests/bpf/progs/map_percpu_stats.c24
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c5
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c40
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c16
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_success.c15
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c94
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c40
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c54
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_typedef.c23
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c6
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh5
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c5
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.c136
-rw-r--r--tools/testing/selftests/bpf/xsk.h2
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh7
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c458
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h21
-rw-r--r--tools/testing/selftests/connector/Makefile6
-rw-r--r--tools/testing/selftests/connector/proc_filter.c310
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh111
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh95
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh31
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh1
-rw-r--r--tools/testing/selftests/hid/Makefile6
-rw-r--r--tools/testing/selftests/net/Makefile7
-rw-r--r--tools/testing/selftests/net/csum.c6
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh129
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile3
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh50
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh155
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh100
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh171
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh228
-rw-r--r--tools/testing/selftests/net/hwtstamp_config.c6
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh7
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh66
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh308
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh66
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh20
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh4
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh181
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh83
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh759
-rw-r--r--tools/testing/selftests/tc-testing/Makefile2
-rw-r--r--tools/testing/vsock/vsock_test.c136
509 files changed, 29676 insertions, 5463 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 609b71f5747d..de27e1620821 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -635,12 +635,12 @@ test coverage.
Q: clang flag for target bpf?
-----------------------------
-Q: In some cases clang flag ``-target bpf`` is used but in other cases the
+Q: In some cases clang flag ``--target=bpf`` is used but in other cases the
default clang target, which matches the underlying architecture, is used.
What is the difference and when I should use which?
A: Although LLVM IR generation and optimization try to stay architecture
-independent, ``-target <arch>`` still has some impact on generated code:
+independent, ``--target=<arch>`` still has some impact on generated code:
- BPF program may recursively include header file(s) with file scope
inline assembly codes. The default target can handle this well,
@@ -658,7 +658,7 @@ independent, ``-target <arch>`` still has some impact on generated code:
The clang option ``-fno-jump-tables`` can be used to disable
switch table generation.
-- For clang ``-target bpf``, it is guaranteed that pointer or long /
+- For clang ``--target=bpf``, it is guaranteed that pointer or long /
unsigned long types will always have a width of 64 bit, no matter
whether underlying clang binary or default target (or kernel) is
32 bit. However, when native clang target is used, then it will
@@ -668,7 +668,7 @@ independent, ``-target <arch>`` still has some impact on generated code:
while the BPF LLVM back end still operates in 64 bit. The native
target is mostly needed in tracing for the case of walking ``pt_regs``
or other kernel structures where CPU's register width matters.
- Otherwise, ``clang -target bpf`` is generally recommended.
+ Otherwise, ``clang --target=bpf`` is generally recommended.
You should use default target when:
@@ -685,7 +685,7 @@ when:
into these structures is verified by the BPF verifier and may result
in verification failures if the native architecture is not aligned with
the BPF architecture, e.g. 64-bit. An example of this is
- BPF_PROG_TYPE_SK_MSG require ``-target bpf``
+ BPF_PROG_TYPE_SK_MSG require ``--target=bpf``
.. Links
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 7cd7c5415a99..f32db1f44ae9 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -990,7 +990,7 @@ format.::
} g2;
int main() { return 0; }
int test() { return 0; }
- -bash-4.4$ clang -c -g -O2 -target bpf t2.c
+ -bash-4.4$ clang -c -g -O2 --target=bpf t2.c
-bash-4.4$ readelf -S t2.o
......
[ 8] .BTF PROGBITS 0000000000000000 00000247
@@ -1000,7 +1000,7 @@ format.::
[10] .rel.BTF.ext REL 0000000000000000 000007e0
0000000000000040 0000000000000010 16 9 8
......
- -bash-4.4$ clang -S -g -O2 -target bpf t2.c
+ -bash-4.4$ clang -S -g -O2 --target=bpf t2.c
-bash-4.4$ cat t2.s
......
.section .BTF,"",@progbits
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index dbb39e8f9889..1ff177b89d66 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -12,9 +12,9 @@ that goes into great technical depth about the BPF Architecture.
.. toctree::
:maxdepth: 1
- instruction-set
verifier
libbpf/index
+ standardization/index
btf
faq
syscall_api
@@ -29,7 +29,6 @@ that goes into great technical depth about the BPF Architecture.
bpf_licensing
test_debug
clang-notes
- linux-notes
other
redirect
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst
index e4a777a6a3a2..450e6403fe3d 100644
--- a/Documentation/bpf/llvm_reloc.rst
+++ b/Documentation/bpf/llvm_reloc.rst
@@ -28,7 +28,7 @@ For example, for the following code::
return g1 + g2 + l1 + l2;
}
-Compiled with ``clang -target bpf -O2 -c test.c``, the following is
+Compiled with ``clang --target=bpf -O2 -c test.c``, the following is
the code with ``llvm-objdump -dr test.o``::
0: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
@@ -157,7 +157,7 @@ and ``call`` instructions. For example::
return gfunc(a, b) + lfunc(a, b) + global;
}
-Compiled with ``clang -target bpf -O2 -c test.c``, we will have
+Compiled with ``clang --target=bpf -O2 -c test.c``, we will have
following code with `llvm-objdump -dr test.o``::
Disassembly of section .text:
@@ -203,7 +203,7 @@ The following is an example to show how R_BPF_64_ABS64 could be generated::
int global() { return 0; }
struct t { void *g; } gbl = { global };
-Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a
+Compiled with ``clang --target=bpf -O2 -g -c test.c``, we will see a
relocation below in ``.data`` section with command
``llvm-readelf -r test.o``::
diff --git a/Documentation/bpf/standardization/index.rst b/Documentation/bpf/standardization/index.rst
new file mode 100644
index 000000000000..09c6ba055fd7
--- /dev/null
+++ b/Documentation/bpf/standardization/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+===================
+BPF Standardization
+===================
+
+This directory contains documents that are being iterated on as part of the BPF
+standardization effort with the IETF. See the `IETF BPF Working Group`_ page
+for the working group charter, documents, and more.
+
+.. toctree::
+ :maxdepth: 1
+
+ instruction-set
+ linux-notes
+
+.. Links:
+.. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 6644842cd3ea..751e657973f0 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -165,7 +165,7 @@ BPF_OR 0x40 dst \|= src
BPF_AND 0x50 dst &= src
BPF_LSH 0x60 dst <<= (src & mask)
BPF_RSH 0x70 dst >>= (src & mask)
-BPF_NEG 0x80 dst = ~src
+BPF_NEG 0x80 dst = -src
BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst
BPF_XOR 0xa0 dst ^= src
BPF_MOV 0xb0 dst = src
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/standardization/linux-notes.rst
index 508d009d3bed..00d2693de025 100644
--- a/Documentation/bpf/linux-notes.rst
+++ b/Documentation/bpf/standardization/linux-notes.rst
@@ -45,7 +45,8 @@ On Linux, this integer is a BTF ID.
Legacy BPF Packet access instructions
=====================================
-As mentioned in the `ISA standard documentation <instruction-set.rst#legacy-bpf-packet-access-instructions>`_,
+As mentioned in the `ISA standard documentation
+<instruction-set.html#legacy-bpf-packet-access-instructions>`_,
Linux has special eBPF instructions for access to packet data that have been
carried over from classic BPF to retain the performance of legacy socket
filters running in the eBPF interpreter.
diff --git a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
new file mode 100644
index 000000000000..aa3162c74833
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,asp-v2.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom ASP 2.0 Ethernet controller
+
+maintainers:
+ - Justin Chen <justin.chen@broadcom.com>
+ - Florian Fainelli <florian.fainelli@broadcom.com>
+
+description: Broadcom Ethernet controller first introduced with 72165
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - brcm,bcm74165-asp
+ - const: brcm,asp-v2.1
+ - items:
+ - enum:
+ - brcm,bcm72165-asp
+ - const: brcm,asp-v2.0
+
+ "#address-cells":
+ const: 1
+ "#size-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ ranges: true
+
+ interrupts:
+ minItems: 1
+ items:
+ - description: RX/TX interrupt
+ - description: Port 0 Wake-on-LAN
+ - description: Port 1 Wake-on-LAN
+
+ clocks:
+ maxItems: 1
+
+ ethernet-ports:
+ type: object
+ properties:
+ "#address-cells":
+ const: 1
+ "#size-cells":
+ const: 0
+
+ patternProperties:
+ "^port@[0-9]+$":
+ type: object
+
+ $ref: ethernet-controller.yaml#
+
+ unevaluatedProperties: false
+
+ properties:
+ reg:
+ maxItems: 1
+ description: Port number
+
+ brcm,channel:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ ASP Channel Number
+
+ The depacketizer channel that consumes packets from
+ the unimac/port.
+
+ required:
+ - reg
+ - brcm,channel
+
+ additionalProperties: false
+
+patternProperties:
+ "^mdio@[0-9a-f]+$":
+ type: object
+ $ref: brcm,unimac-mdio.yaml
+
+ description:
+ ASP internal UniMAC MDIO bus
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ ethernet@9c00000 {
+ compatible = "brcm,bcm72165-asp", "brcm,asp-v2.0";
+ reg = <0x9c00000 0x1fff14>;
+ interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+ ranges = <0x0 0x9c00000 0x1fff14>;
+ clocks = <&scmi 14>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ mdio@c614 {
+ compatible = "brcm,asp-v2.0-mdio";
+ reg = <0xc614 0x8>;
+ reg-names = "mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@1 {
+ reg = <1>;
+ };
+ };
+
+ mdio@ce14 {
+ compatible = "brcm,asp-v2.0-mdio";
+ reg = <0xce14 0x8>;
+ reg-names = "mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+ };
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ brcm,channel = <8>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ brcm,channel = <9>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
index 0be426ee1e44..6684810fcbf0 100644
--- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
@@ -22,6 +22,8 @@ properties:
- brcm,genet-mdio-v3
- brcm,genet-mdio-v4
- brcm,genet-mdio-v5
+ - brcm,asp-v2.0-mdio
+ - brcm,asp-v2.1-mdio
- brcm,unimac-mdio
reg:
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
index 67879aab623b..bb518c831f7b 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -122,8 +122,6 @@ required:
- compatible
- reg
- reg-names
- - interrupts
- - interrupt-names
- clocks
- clock-names
- bosch,mram-cfg
@@ -132,6 +130,7 @@ additionalProperties: false
examples:
- |
+ // Example with interrupts
#include <dt-bindings/clock/imx6sx-clock.h>
can@20e8000 {
compatible = "bosch,m_can";
@@ -149,4 +148,21 @@ examples:
};
};
+ - |
+ // Example with timer polling
+ #include <dt-bindings/clock/imx6sx-clock.h>
+ can@20e8000 {
+ compatible = "bosch,m_can";
+ reg = <0x020e8000 0x4000>, <0x02298000 0x4000>;
+ reg-names = "m_can", "message_ram";
+ clocks = <&clks IMX6SX_CLK_CANFD>,
+ <&clks IMX6SX_CLK_CANFD>;
+ clock-names = "hclk", "cclk";
+ bosch,mram-cfg = <0x0 0 0 32 0 0 0 1>;
+
+ can-transceiver {
+ max-bitrate = <5000000>;
+ };
+ };
+
...
diff --git a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
index 897d2cbda45b..64d57c343e6f 100644
--- a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
+++ b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
@@ -46,6 +46,9 @@ properties:
$ref: /schemas/types.yaml#/definitions/uint32
description: CAN Tx mailbox buffer count (CAN FD)
+ resets:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
index 8d971813bab6..ec74a660beda 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
@@ -36,7 +36,7 @@ additionalProperties: true
$defs:
ethernet-ports:
description: A DSA switch without any extra port properties
- $ref: '#/'
+ $ref: '#'
patternProperties:
"^(ethernet-)?ports$":
diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml
index acb2b2ac4fe1..8d3554818c37 100644
--- a/Documentation/devicetree/bindings/net/mediatek,net.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml
@@ -19,10 +19,12 @@ properties:
enum:
- mediatek,mt2701-eth
- mediatek,mt7623-eth
+ - mediatek,mt7621-eth
- mediatek,mt7622-eth
- mediatek,mt7629-eth
- mediatek,mt7981-eth
- mediatek,mt7986-eth
+ - mediatek,mt7988-eth
- ralink,rt5350-eth
reg:
@@ -32,7 +34,7 @@ properties:
clock-names: true
interrupts:
- minItems: 3
+ minItems: 1
maxItems: 4
power-domains:
@@ -60,6 +62,12 @@ properties:
Phandle to the mediatek hifsys controller used to provide various clocks
and reset to the system.
+ mediatek,infracfg:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon node that handles the path from GMAC to
+ PHY variants.
+
mediatek,sgmiisys:
$ref: /schemas/types.yaml#/definitions/phandle-array
minItems: 1
@@ -121,6 +129,8 @@ allOf:
- const: gp1
- const: gp2
+ mediatek,infracfg: false
+
mediatek,pctl:
$ref: /schemas/types.yaml#/definitions/phandle
description:
@@ -135,6 +145,32 @@ allOf:
properties:
compatible:
contains:
+ enum:
+ - mediatek,mt7621-eth
+ then:
+ properties:
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: ethif
+ - const: fe
+
+ mediatek,infracfg: false
+
+ mediatek,wed: false
+
+ mediatek,wed-pcie: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
const: mediatek,mt7622-eth
then:
properties:
@@ -159,6 +195,8 @@ allOf:
- const: sgmii_ck
- const: eth2pll
+ mediatek,infracfg: false
+
mediatek,sgmiisys:
minItems: 1
maxItems: 1
@@ -204,12 +242,6 @@ allOf:
- const: sgmii_ck
- const: eth2pll
- mediatek,infracfg:
- $ref: /schemas/types.yaml#/definitions/phandle
- description:
- Phandle to the syscon node that handles the path from GMAC to
- PHY variants.
-
mediatek,sgmiisys:
minItems: 2
maxItems: 2
@@ -250,6 +282,8 @@ allOf:
- const: netsys0
- const: netsys1
+ mediatek,infracfg: false
+
mediatek,sgmiisys:
minItems: 2
maxItems: 2
@@ -286,6 +320,67 @@ allOf:
- const: netsys0
- const: netsys1
+ mediatek,infracfg: false
+
+ mediatek,sgmiisys:
+ minItems: 2
+ maxItems: 2
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt7988-eth
+ then:
+ properties:
+ interrupts:
+ minItems: 4
+
+ clocks:
+ minItems: 34
+ maxItems: 34
+
+ clock-names:
+ items:
+ - const: crypto
+ - const: fe
+ - const: gp2
+ - const: gp1
+ - const: gp3
+ - const: ethwarp_wocpu2
+ - const: ethwarp_wocpu1
+ - const: ethwarp_wocpu0
+ - const: esw
+ - const: netsys0
+ - const: netsys1
+ - const: sgmii_tx250m
+ - const: sgmii_rx250m
+ - const: sgmii2_tx250m
+ - const: sgmii2_rx250m
+ - const: top_usxgmii0_sel
+ - const: top_usxgmii1_sel
+ - const: top_sgm0_sel
+ - const: top_sgm1_sel
+ - const: top_xfi_phy0_xtal_sel
+ - const: top_xfi_phy1_xtal_sel
+ - const: top_eth_gmii_sel
+ - const: top_eth_refck_50m_sel
+ - const: top_eth_sys_200m_sel
+ - const: top_eth_sys_sel
+ - const: top_eth_xgmii_sel
+ - const: top_eth_mii_sel
+ - const: top_netsys_sel
+ - const: top_netsys_500m_sel
+ - const: top_netsys_pao_2x_sel
+ - const: top_netsys_sync_250m_sel
+ - const: top_netsys_ppefb_250m_sel
+ - const: top_netsys_warp_sel
+ - const: wocpu1
+ - const: wocpu0
+ - const: xgp1
+ - const: xgp2
+ - const: xgp3
+
mediatek,sgmiisys:
minItems: 2
maxItems: 2
diff --git a/Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml b/Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml
index 157e3bbcaf6f..26688e2302ea 100644
--- a/Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml
+++ b/Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml
@@ -52,6 +52,40 @@ properties:
for a timer.
type: boolean
+ motorcomm,rx-clk-drv-microamp:
+ description: |
+ drive strength of rx_clk rgmii pad.
+ The YT8531 RGMII LDO voltage supports 1.8V/3.3V, and the LDO voltage can
+ be configured with hardware pull-up resistors to match the SOC voltage
+ (usually 1.8V).
+ The software can read the registers to obtain the LDO voltage and configure
+ the legal drive strength(curren).
+ =====================================================
+ | voltage | current Available (uA) |
+ | 1.8v | 1200 2100 2700 2910 3110 3600 3970 4350 |
+ | 3.3v | 3070 4080 4370 4680 5020 5450 5740 6140 |
+ =====================================================
+ enum: [ 1200, 2100, 2700, 2910, 3070, 3110, 3600, 3970,
+ 4080, 4350, 4370, 4680, 5020, 5450, 5740, 6140 ]
+ default: 2910
+
+ motorcomm,rx-data-drv-microamp:
+ description: |
+ drive strength of rx_data/rx_ctl rgmii pad.
+ The YT8531 RGMII LDO voltage supports 1.8V/3.3V, and the LDO voltage can
+ be configured with hardware pull-up resistors to match the SOC voltage
+ (usually 1.8V).
+ The software can read the registers to obtain the LDO voltage and configure
+ the legal drive strength(curren).
+ =====================================================
+ | voltage | current Available (uA) |
+ | 1.8v | 1200 2100 2700 2910 3110 3600 3970 4350 |
+ | 3.3v | 3070 4080 4370 4680 5020 5450 5740 6140 |
+ =====================================================
+ enum: [ 1200, 2100, 2700, 2910, 3070, 3110, 3600, 3970,
+ 4080, 4350, 4370, 4680, 5020, 5450, 5740, 6140 ]
+ default: 2910
+
motorcomm,tx-clk-adj-enabled:
description: |
This configuration is mainly to adapt to VF2 with JH7110 SoC.
diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
index 161d28919316..3acd09f0da86 100644
--- a/Documentation/devicetree/bindings/net/qca,ar803x.yaml
+++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
@@ -75,6 +75,7 @@ properties:
description:
Initial data for the VDDIO regulator. Set this to 1.5V or 1.8V.
$ref: /schemas/regulator/regulator.yaml
+ unevaluatedProperties: false
vddh-regulator:
type: object
@@ -82,6 +83,7 @@ properties:
Dummy subnode to model the external connection of the PHY VDDH
regulator to VDDIO.
$ref: /schemas/regulator/regulator.yaml
+ unevaluatedProperties: false
unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 176ea5f90251..bb943c96c196 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -80,6 +80,7 @@ properties:
"output" means GMAC provides the reference clock.
$ref: /schemas/types.yaml#/definitions/string
enum: [input, output]
+ default: input
rockchip,grf:
description: The phandle of the syscon node for the general register file.
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index b99e7ffef7a1..e41015310a6e 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -62,6 +62,12 @@ attribute-sets:
type: u64
enum: xdp-act
enum-as-flags: true
+ -
+ name: xdp_zc_max_segs
+ doc: max fragment count supported by ZC driver
+ type: u32
+ checks:
+ min: 1
operations:
list:
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 1cc35de336a4..dceeb0d763aa 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -462,8 +462,92 @@ XDP_OPTIONS getsockopt
Gets options from an XDP socket. The only one supported so far is
XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not.
+Multi-Buffer Support
+====================
+
+With multi-buffer support, programs using AF_XDP sockets can receive
+and transmit packets consisting of multiple buffers both in copy and
+zero-copy mode. For example, a packet can consist of two
+frames/buffers, one with the header and the other one with the data,
+or a 9K Ethernet jumbo frame can be constructed by chaining together
+three 4K frames.
+
+Some definitions:
+
+* A packet consists of one or more frames
+
+* A descriptor in one of the AF_XDP rings always refers to a single
+ frame. In the case the packet consists of a single frame, the
+ descriptor refers to the whole packet.
+
+To enable multi-buffer support for an AF_XDP socket, use the new bind
+flag XDP_USE_SG. If this is not provided, all multi-buffer packets
+will be dropped just as before. Note that the XDP program loaded also
+needs to be in multi-buffer mode. This can be accomplished by using
+"xdp.frags" as the section name of the XDP program used.
+
+To represent a packet consisting of multiple frames, a new flag called
+XDP_PKT_CONTD is introduced in the options field of the Rx and Tx
+descriptors. If it is true (1) the packet continues with the next
+descriptor and if it is false (0) it means this is the last descriptor
+of the packet. Why the reverse logic of end-of-packet (eop) flag found
+in many NICs? Just to preserve compatibility with non-multi-buffer
+applications that have this bit set to false for all packets on Rx,
+and the apps set the options field to zero for Tx, as anything else
+will be treated as an invalid descriptor.
+
+These are the semantics for producing packets onto AF_XDP Tx ring
+consisting of multiple frames:
+
+* When an invalid descriptor is found, all the other
+ descriptors/frames of this packet are marked as invalid and not
+ completed. The next descriptor is treated as the start of a new
+ packet, even if this was not the intent (because we cannot guess
+ the intent). As before, if your program is producing invalid
+ descriptors you have a bug that must be fixed.
+
+* Zero length descriptors are treated as invalid descriptors.
+
+* For copy mode, the maximum supported number of frames in a packet is
+ equal to CONFIG_MAX_SKB_FRAGS + 1. If it is exceeded, all
+ descriptors accumulated so far are dropped and treated as
+ invalid. To produce an application that will work on any system
+ regardless of this config setting, limit the number of frags to 18,
+ as the minimum value of the config is 17.
+
+* For zero-copy mode, the limit is up to what the NIC HW
+ supports. Usually at least five on the NICs we have checked. We
+ consciously chose to not enforce a rigid limit (such as
+ CONFIG_MAX_SKB_FRAGS + 1) for zero-copy mode, as it would have
+ resulted in copy actions under the hood to fit into what limit the
+ NIC supports. Kind of defeats the purpose of zero-copy mode. How to
+ probe for this limit is explained in the "probe for multi-buffer
+ support" section.
+
+On the Rx path in copy-mode, the xsk core copies the XDP data into
+multiple descriptors, if needed, and sets the XDP_PKT_CONTD flag as
+detailed before. Zero-copy mode works the same, though the data is not
+copied. When the application gets a descriptor with the XDP_PKT_CONTD
+flag set to one, it means that the packet consists of multiple buffers
+and it continues with the next buffer in the following
+descriptor. When a descriptor with XDP_PKT_CONTD == 0 is received, it
+means that this is the last buffer of the packet. AF_XDP guarantees
+that only a complete packet (all frames in the packet) is sent to the
+application. If there is not enough space in the AF_XDP Rx ring, all
+frames of the packet will be dropped.
+
+If application reads a batch of descriptors, using for example the libxdp
+interfaces, it is not guaranteed that the batch will end with a full
+packet. It might end in the middle of a packet and the rest of the
+buffers of that packet will arrive at the beginning of the next batch,
+since the libxdp interface does not read the whole ring (unless you
+have an enormous batch size or a very small ring size).
+
+An example program each for Rx and Tx multi-buffer support can be found
+later in this document.
+
Usage
-=====
+-----
In order to use AF_XDP sockets two parts are needed. The
user-space application and the XDP program. For a complete setup and
@@ -541,6 +625,131 @@ like this:
But please use the libbpf functions as they are optimized and ready to
use. Will make your life easier.
+Usage Multi-Buffer Rx
+---------------------
+
+Here is a simple Rx path pseudo-code example (using libxdp interfaces
+for simplicity). Error paths have been excluded to keep it short:
+
+.. code-block:: c
+
+ void rx_packets(struct xsk_socket_info *xsk)
+ {
+ static bool new_packet = true;
+ u32 idx_rx = 0, idx_fq = 0;
+ static char *pkt;
+
+ int rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
+
+ xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+
+ for (int i = 0; i < rcvd; i++) {
+ struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ char *frag = xsk_umem__get_data(xsk->umem->buffer, desc->addr);
+ bool eop = !(desc->options & XDP_PKT_CONTD);
+
+ if (new_packet)
+ pkt = frag;
+ else
+ add_frag_to_pkt(pkt, frag);
+
+ if (eop)
+ process_pkt(pkt);
+
+ new_packet = eop;
+
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = desc->addr;
+ }
+
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
+ }
+
+Usage Multi-Buffer Tx
+---------------------
+
+Here is an example Tx path pseudo-code (using libxdp interfaces for
+simplicity) ignoring that the umem is finite in size, and that we
+eventually will run out of packets to send. Also assumes pkts.addr
+points to a valid location in the umem.
+
+.. code-block:: c
+
+ void tx_packets(struct xsk_socket_info *xsk, struct pkt *pkts,
+ int batch_size)
+ {
+ u32 idx, i, pkt_nb = 0;
+
+ xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx);
+
+ for (i = 0; i < batch_size;) {
+ u64 addr = pkts[pkt_nb].addr;
+ u32 len = pkts[pkt_nb].size;
+
+ do {
+ struct xdp_desc *tx_desc;
+
+ tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i++);
+ tx_desc->addr = addr;
+
+ if (len > xsk_frame_size) {
+ tx_desc->len = xsk_frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = len;
+ tx_desc->options = 0;
+ pkt_nb++;
+ }
+ len -= tx_desc->len;
+ addr += xsk_frame_size;
+
+ if (i == batch_size) {
+ /* Remember len, addr, pkt_nb for next iteration.
+ * Skipped for simplicity.
+ */
+ break;
+ }
+ } while (len);
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, i);
+ }
+
+Probing for Multi-Buffer Support
+--------------------------------
+
+To discover if a driver supports multi-buffer AF_XDP in SKB or DRV
+mode, use the XDP_FEATURES feature of netlink in linux/netdev.h to
+query for NETDEV_XDP_ACT_RX_SG support. This is the same flag as for
+querying for XDP multi-buffer support. If XDP supports multi-buffer in
+a driver, then AF_XDP will also support that in SKB and DRV mode.
+
+To discover if a driver supports multi-buffer AF_XDP in zero-copy
+mode, use XDP_FEATURES and first check the NETDEV_XDP_ACT_XSK_ZEROCOPY
+flag. If it is set, it means that at least zero-copy is supported and
+you should go and check the netlink attribute
+NETDEV_A_DEV_XDP_ZC_MAX_SEGS in linux/netdev.h. An unsigned integer
+value will be returned stating the max number of frags that are
+supported by this device in zero-copy mode. These are the possible
+return values:
+
+1: Multi-buffer for zero-copy is not supported by this device, as max
+ one fragment supported means that multi-buffer is not possible.
+
+>=2: Multi-buffer is supported in zero-copy mode for this device. The
+ returned number signifies the max number of frags supported.
+
+For an example on how these are used through libbpf, please take a
+look at tools/testing/selftests/bpf/xskxceiver.c.
+
+Multi-Buffer Support for Zero-Copy Drivers
+------------------------------------------
+
+Zero-copy drivers usually use the batched APIs for Rx and Tx
+processing. Note that the Tx batch API guarantees that it will provide
+a batch of Tx descriptors that ends with full packet at the end. This
+to facilitate extending a zero-copy driver with multi-buffer support.
+
Sample application
==================
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index bfd233cfac35..1e196cb9ce25 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -332,3 +332,11 @@ Setup HTB offload
# tc class add dev <interface> parent 1: classid 1:1 htb rate 10Gbit prio 1
# tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 7
+
+4. Create tc classes with same priorities and different quantum::
+
+ # tc class add dev <interface> parent 1: classid 1:1 htb rate 10Gbit prio 2 quantum 409600
+
+ # tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 2 quantum 188416
+
+ # tc class add dev <interface> parent 1: classid 1:3 htb rate 10Gbit prio 2 quantum 32768
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 4a010a7cde7f..37603ad6126b 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -321,6 +321,7 @@ tcp_abort_on_overflow - BOOLEAN
option can harm clients of your server.
tcp_adv_win_scale - INTEGER
+ Obsolete since linux-6.6
Count buffering overhead as bytes/2^tcp_adv_win_scale
(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
if it is <= 0.
@@ -2287,6 +2288,14 @@ accept_ra_min_hop_limit - INTEGER
Default: 1
+accept_ra_min_rtr_lft - INTEGER
+ Minimum acceptable router lifetime in Router Advertisement.
+
+ RAs with a router lifetime less than this value shall be
+ ignored. RAs with a router lifetime of 0 are unaffected.
+
+ Default: 0
+
accept_ra_pinfo - BOOLEAN
Learn Prefix Information in Router Advertisement.
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index dd0518e002f6..7a9de0568e84 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -13,6 +13,8 @@ IPv6 support by Cong Wang <xiyou.wangcong@gmail.com>, Jan 1 2013
Extended console support by Tejun Heo <tj@kernel.org>, May 1 2015
+Release prepend support by Breno Leitao <leitao@debian.org>, Jul 7 2023
+
Please send bug reports to Matt Mackall <mpm@selenic.com>
Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
@@ -34,10 +36,11 @@ Sender and receiver configuration:
It takes a string configuration parameter "netconsole" in the
following format::
- netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+ netconsole=[+][r][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
where
+ if present, enable extended console support
+ r if present, prepend kernel version (release) to the message
src-port source for UDP packets (defaults to 6665)
src-ip source IP to use (interface address)
dev network interface (eth0)
@@ -125,6 +128,7 @@ The interface exposes these parameters of a netconsole target to userspace:
============== ================================= ============
enabled Is this target currently enabled? (read-write)
extended Extended mode enabled (read-write)
+ release Prepend kernel release to message (read-write)
dev_name Local network interface name (read-write)
local_port Source UDP port to use (read-write)
remote_port Remote agent's UDP port (read-write)
@@ -165,6 +169,11 @@ following format which is the same as /dev/kmsg::
<level>,<sequnum>,<timestamp>,<contflag>;<message text>
+If 'r' (release) feature is enabled, the kernel release version is
+prepended to the start of the message. Example::
+
+ 6.4.0,6,444,501151268,-;netconsole: network logging started
+
Non printable characters in <message text> are escaped using "\xff"
notation. If the message contains optional dictionary, verbatim
newline is used as the delimiter.
diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
index 873efd97f822..0aa850cf4447 100644
--- a/Documentation/networking/page_pool.rst
+++ b/Documentation/networking/page_pool.rst
@@ -13,9 +13,9 @@ replacing dev_alloc_pages().
API keeps track of in-flight pages, in order to let API user know
when it is safe to free a page_pool object. Thus, API users
-must run page_pool_release_page() when a page is leaving the page_pool or
-call page_pool_put_page() where appropriate in order to maintain correct
-accounting.
+must call page_pool_put_page() to free the page, or attach
+the page to a page_pool-aware objects like skbs marked with
+skb_mark_for_recycle().
API user must call page_pool_put_page() once on a page, as it
will either recycle the page, or in case of refcnt > 1, it will
@@ -87,9 +87,6 @@ a page will cause no race conditions is enough.
must guarantee safe context (e.g NAPI), since it will recycle the page
directly into the pool fast cache.
-* page_pool_release_page(): Unmap the page (if mapped) and account for it on
- in-flight counters.
-
* page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool
caches.
@@ -194,7 +191,7 @@ NAPI poller
if XDP_DROP:
page_pool_recycle_direct(page_pool, page);
} else (packet_is_skb) {
- page_pool_release_page(page_pool, page);
+ skb_mark_for_recycle(skb);
new_page = page_pool_dev_alloc_pages(page_pool);
}
}
diff --git a/MAINTAINERS b/MAINTAINERS
index d516295978a4..24eb7ed6211f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3686,6 +3686,7 @@ F: include/linux/filter.h
F: include/linux/tnum.h
F: kernel/bpf/core.c
F: kernel/bpf/dispatcher.c
+F: kernel/bpf/mprog.c
F: kernel/bpf/syscall.c
F: kernel/bpf/tnum.c
F: kernel/bpf/trampoline.c
@@ -3696,7 +3697,7 @@ R: David Vernet <void@manifault.com>
L: bpf@vger.kernel.org
L: bpf@ietf.org
S: Maintained
-F: Documentation/bpf/instruction-set.rst
+F: Documentation/bpf/standardization/
BPF [GENERAL] (Safe Dynamic Programs and Tools)
M: Alexei Starovoitov <ast@kernel.org>
@@ -3779,13 +3780,15 @@ L: netdev@vger.kernel.org
S: Maintained
F: kernel/bpf/bpf_struct*
-BPF [NETWORKING] (tc BPF, sock_addr)
+BPF [NETWORKING] (tcx & tc BPF, sock_addr)
M: Martin KaFai Lau <martin.lau@linux.dev>
M: Daniel Borkmann <daniel@iogearbox.net>
R: John Fastabend <john.fastabend@gmail.com>
L: bpf@vger.kernel.org
L: netdev@vger.kernel.org
S: Maintained
+F: include/net/tcx.h
+F: kernel/bpf/tcx.c
F: net/core/filter.c
F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
@@ -3837,6 +3840,15 @@ S: Maintained
F: kernel/bpf/stackmap.c
F: kernel/trace/bpf_trace.c
+BROADCOM ASP 2.0 ETHERNET DRIVER
+M: Justin Chen <justin.chen@broadcom.com>
+M: Florian Fainelli <florian.fainelli@broadcom.com>
+L: bcm-kernel-feedback-list@broadcom.com
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml
+F: drivers/net/ethernet/broadcom/asp2/
+
BROADCOM B44 10/100 ETHERNET DRIVER
M: Michael Chan <michael.chan@broadcom.com>
L: netdev@vger.kernel.org
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 438adb695daa..83c4b45dc65f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1857,59 +1857,177 @@ emit_jmp:
return proglen;
}
-static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
- int stack_size)
+static void clean_stack_garbage(const struct btf_func_model *m,
+ u8 **pprog, int nr_stack_slots,
+ int stack_size)
{
- int i, j, arg_size;
- bool next_same_struct = false;
+ int arg_size, off;
+ u8 *prog;
+
+ /* Generally speaking, the compiler will pass the arguments
+ * on-stack with "push" instruction, which will take 8-byte
+ * on the stack. In this case, there won't be garbage values
+ * while we copy the arguments from origin stack frame to current
+ * in BPF_DW.
+ *
+ * However, sometimes the compiler will only allocate 4-byte on
+ * the stack for the arguments. For now, this case will only
+ * happen if there is only one argument on-stack and its size
+ * not more than 4 byte. In this case, there will be garbage
+ * values on the upper 4-byte where we store the argument on
+ * current stack frame.
+ *
+ * arguments on origin stack:
+ *
+ * stack_arg_1(4-byte) xxx(4-byte)
+ *
+ * what we copy:
+ *
+ * stack_arg_1(8-byte): stack_arg_1(origin) xxx
+ *
+ * and the xxx is the garbage values which we should clean here.
+ */
+ if (nr_stack_slots != 1)
+ return;
+
+ /* the size of the last argument */
+ arg_size = m->arg_size[m->nr_args - 1];
+ if (arg_size <= 4) {
+ off = -(stack_size - 4);
+ prog = *pprog;
+ /* mov DWORD PTR [rbp + off], 0 */
+ if (!is_imm8(off))
+ EMIT2_off32(0xC7, 0x85, off);
+ else
+ EMIT3(0xC7, 0x45, off);
+ EMIT(0, 4);
+ *pprog = prog;
+ }
+}
+
+/* get the count of the regs that are used to pass arguments */
+static int get_nr_used_regs(const struct btf_func_model *m)
+{
+ int i, arg_regs, nr_used_regs = 0;
+
+ for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
+ arg_regs = (m->arg_size[i] + 7) / 8;
+ if (nr_used_regs + arg_regs <= 6)
+ nr_used_regs += arg_regs;
+
+ if (nr_used_regs >= 6)
+ break;
+ }
+
+ return nr_used_regs;
+}
+
+static void save_args(const struct btf_func_model *m, u8 **prog,
+ int stack_size, bool for_call_origin)
+{
+ int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
+ int i, j;
/* Store function arguments to stack.
* For a function that accepts two pointers the sequence will be:
* mov QWORD PTR [rbp-0x10],rdi
* mov QWORD PTR [rbp-0x8],rsi
*/
- for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
- /* The arg_size is at most 16 bytes, enforced by the verifier. */
- arg_size = m->arg_size[j];
- if (arg_size > 8) {
- arg_size = 8;
- next_same_struct = !next_same_struct;
- }
+ for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
+ arg_regs = (m->arg_size[i] + 7) / 8;
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
- i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
- -(stack_size - i * 8));
+ /* According to the research of Yonghong, struct members
+ * should be all in register or all on the stack.
+ * Meanwhile, the compiler will pass the argument on regs
+ * if the remaining regs can hold the argument.
+ *
+ * Disorder of the args can happen. For example:
+ *
+ * struct foo_struct {
+ * long a;
+ * int b;
+ * };
+ * int foo(char, char, char, char, char, struct foo_struct,
+ * char);
+ *
+ * the arg1-5,arg7 will be passed by regs, and arg6 will
+ * by stack.
+ */
+ if (nr_regs + arg_regs > 6) {
+ /* copy function arguments from origin stack frame
+ * into current stack frame.
+ *
+ * The starting address of the arguments on-stack
+ * is:
+ * rbp + 8(push rbp) +
+ * 8(return addr of origin call) +
+ * 8(return addr of the caller)
+ * which means: rbp + 24
+ */
+ for (j = 0; j < arg_regs; j++) {
+ emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
+ nr_stack_slots * 8 + 0x18);
+ emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
+ -stack_size);
+
+ if (!nr_stack_slots)
+ first_off = stack_size;
+ stack_size -= 8;
+ nr_stack_slots++;
+ }
+ } else {
+ /* Only copy the arguments on-stack to current
+ * 'stack_size' and ignore the regs, used to
+ * prepare the arguments on-stack for orign call.
+ */
+ if (for_call_origin) {
+ nr_regs += arg_regs;
+ continue;
+ }
- j = next_same_struct ? j : j + 1;
+ /* copy the arguments from regs into stack */
+ for (j = 0; j < arg_regs; j++) {
+ emit_stx(prog, BPF_DW, BPF_REG_FP,
+ nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs,
+ -stack_size);
+ stack_size -= 8;
+ nr_regs++;
+ }
+ }
}
+
+ clean_stack_garbage(m, prog, nr_stack_slots, first_off);
}
-static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
+static void restore_regs(const struct btf_func_model *m, u8 **prog,
int stack_size)
{
- int i, j, arg_size;
- bool next_same_struct = false;
+ int i, j, arg_regs, nr_regs = 0;
/* Restore function arguments from stack.
* For a function that accepts two pointers the sequence will be:
* EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
* EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ *
+ * The logic here is similar to what we do in save_args()
*/
- for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
- /* The arg_size is at most 16 bytes, enforced by the verifier. */
- arg_size = m->arg_size[j];
- if (arg_size > 8) {
- arg_size = 8;
- next_same_struct = !next_same_struct;
+ for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
+ arg_regs = (m->arg_size[i] + 7) / 8;
+ if (nr_regs + arg_regs <= 6) {
+ for (j = 0; j < arg_regs; j++) {
+ emit_ldx(prog, BPF_DW,
+ nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs,
+ BPF_REG_FP,
+ -stack_size);
+ stack_size -= 8;
+ nr_regs++;
+ }
+ } else {
+ stack_size -= 8 * arg_regs;
}
- emit_ldx(prog, bytes_to_bpf_size(arg_size),
- i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
- BPF_REG_FP,
- -(stack_size - i * 8));
-
- j = next_same_struct ? j : j + 1;
+ if (nr_regs >= 6)
+ break;
}
}
@@ -1938,7 +2056,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
- EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
+ if (!is_imm8(-run_ctx_off))
+ EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off);
+ else
+ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
return -EINVAL;
@@ -1954,7 +2075,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_nops(&prog, 2);
/* arg1: lea rdi, [rbp - stack_size] */
- EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ if (!is_imm8(-stack_size))
+ EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
+ else
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
/* arg2: progs[i]->insnsi for interpreter */
if (!p->jited)
emit_mov_imm64(&prog, BPF_REG_2,
@@ -1984,7 +2108,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
- EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
+ if (!is_imm8(-run_ctx_off))
+ EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
+ else
+ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
return -EINVAL;
@@ -2136,7 +2263,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off;
+ int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2150,8 +2277,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
nr_regs += (m->arg_size[i] + 7) / 8 - 1;
- /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
- if (nr_regs > 6)
+ /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
+ * are passed through regs, the remains are through stack.
+ */
+ if (nr_regs > MAX_BPF_FUNC_ARGS)
return -ENOTSUPP;
/* Generated trampoline stack layout:
@@ -2170,7 +2299,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
+ * RBP - rbx_off [ rbx value ] always
+ *
* RBP - run_ctx_off [ bpf_tramp_run_ctx ]
+ *
+ * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
+ * [ ... ]
+ * [ stack_arg2 ]
+ * RBP - arg_stack_off [ stack_arg1 ]
*/
/* room for return value of orig_call or fentry prog */
@@ -2190,9 +2326,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ip_off = stack_size;
+ stack_size += 8;
+ rbx_off = stack_size;
+
stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
run_ctx_off = stack_size;
+ if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
+ /* the space that used to pass arguments on-stack */
+ stack_size += (nr_regs - get_nr_used_regs(m)) * 8;
+ /* make sure the stack pointer is 16-byte aligned if we
+ * need pass arguments on stack, which means
+ * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
+ * should be 16-byte aligned. Following code depend on
+ * that stack_size is already 8-byte aligned.
+ */
+ stack_size += (stack_size % 16) ? 0 : 8;
+ }
+
+ arg_stack_off = stack_size;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
@@ -2212,8 +2365,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
x86_call_depth_emit_accounting(&prog, NULL);
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
- EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
- EMIT1(0x53); /* push rbx */
+ if (!is_imm8(stack_size))
+ /* sub rsp, stack_size */
+ EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
+ else
+ /* sub rsp, stack_size */
+ EMIT4(0x48, 0x83, 0xEC, stack_size);
+ /* mov QWORD PTR [rbp - rbx_off], rbx */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
/* Store number of argument registers of the traced function:
* mov rax, nr_regs
@@ -2231,7 +2390,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
}
- save_regs(m, &prog, nr_regs, regs_off);
+ save_args(m, &prog, regs_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
@@ -2261,7 +2420,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_regs(m, &prog, nr_regs, regs_off);
+ restore_regs(m, &prog, regs_off);
+ save_args(m, &prog, arg_stack_off, true);
if (flags & BPF_TRAMP_F_ORIG_STACK) {
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
@@ -2302,7 +2462,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_regs(m, &prog, nr_regs, regs_off);
+ restore_regs(m, &prog, regs_off);
/* This needs to be done regardless. If there were fmod_ret programs,
* the return value is only updated on the stack and still needs to be
@@ -2321,7 +2481,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (save_ret)
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
- EMIT1(0x5B); /* pop rbx */
+ emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
EMIT1(0xC9); /* leave */
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index ccac1c453080..05d562e9c8b1 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -48,8 +48,47 @@ static DEFINE_PER_CPU(struct local_event, local_event) = {
.lock = INIT_LOCAL_LOCK(lock),
};
+static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+ __u32 what, exit_code, *ptr;
+ enum proc_cn_mcast_op mc_op;
+ uintptr_t val;
+
+ if (!dsk || !data)
+ return 0;
+
+ ptr = (__u32 *)data;
+ what = *ptr++;
+ exit_code = *ptr;
+ val = ((struct proc_input *)(dsk->sk_user_data))->event_type;
+ mc_op = ((struct proc_input *)(dsk->sk_user_data))->mcast_op;
+
+ if (mc_op == PROC_CN_MCAST_IGNORE)
+ return 1;
+
+ if ((__u32)val == PROC_EVENT_ALL)
+ return 0;
+
+ /*
+ * Drop packet if we have to report only non-zero exit status
+ * (PROC_EVENT_NONZERO_EXIT) and exit status is 0
+ */
+ if (((__u32)val & PROC_EVENT_NONZERO_EXIT) &&
+ (what == PROC_EVENT_EXIT)) {
+ if (exit_code)
+ return 0;
+ }
+
+ if ((__u32)val & what)
+ return 0;
+
+ return 1;
+}
+
static inline void send_msg(struct cn_msg *msg)
{
+ __u32 filter_data[2];
+
local_lock(&local_event.lock);
msg->seq = __this_cpu_inc_return(local_event.count) - 1;
@@ -61,7 +100,16 @@ static inline void send_msg(struct cn_msg *msg)
*
* If cn_netlink_send() fails, the data is not sent.
*/
- cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
+ filter_data[0] = ((struct proc_event *)msg->data)->what;
+ if (filter_data[0] == PROC_EVENT_EXIT) {
+ filter_data[1] =
+ ((struct proc_event *)msg->data)->event_data.exit.exit_code;
+ } else {
+ filter_data[1] = 0;
+ }
+
+ cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
+ cn_filter, (void *)filter_data);
local_unlock(&local_event.lock);
}
@@ -341,16 +389,17 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
/**
* cn_proc_mcast_ctl
- * @data: message sent from userspace via the connector
+ * @msg: message sent from userspace via the connector
+ * @nsp: NETLINK_CB of the client's socket buffer
*/
static void cn_proc_mcast_ctl(struct cn_msg *msg,
struct netlink_skb_parms *nsp)
{
- enum proc_cn_mcast_op *mc_op = NULL;
- int err = 0;
-
- if (msg->len != sizeof(*mc_op))
- return;
+ enum proc_cn_mcast_op mc_op = 0, prev_mc_op = 0;
+ struct proc_input *pinput = NULL;
+ enum proc_cn_event ev_type = 0;
+ int err = 0, initial = 0;
+ struct sock *sk = NULL;
/*
* Events are reported with respect to the initial pid
@@ -361,19 +410,51 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
!task_is_in_init_pid_ns(current))
return;
- /* Can only change if privileged. */
- if (!__netlink_ns_capable(nsp, &init_user_ns, CAP_NET_ADMIN)) {
- err = EPERM;
- goto out;
+ if (msg->len == sizeof(*pinput)) {
+ pinput = (struct proc_input *)msg->data;
+ mc_op = pinput->mcast_op;
+ ev_type = pinput->event_type;
+ } else if (msg->len == sizeof(mc_op)) {
+ mc_op = *((enum proc_cn_mcast_op *)msg->data);
+ ev_type = PROC_EVENT_ALL;
+ } else {
+ return;
+ }
+
+ ev_type = valid_event((enum proc_cn_event)ev_type);
+
+ if (ev_type == PROC_EVENT_NONE)
+ ev_type = PROC_EVENT_ALL;
+
+ if (nsp->sk) {
+ sk = nsp->sk;
+ if (sk->sk_user_data == NULL) {
+ sk->sk_user_data = kzalloc(sizeof(struct proc_input),
+ GFP_KERNEL);
+ if (sk->sk_user_data == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ initial = 1;
+ } else {
+ prev_mc_op =
+ ((struct proc_input *)(sk->sk_user_data))->mcast_op;
+ }
+ ((struct proc_input *)(sk->sk_user_data))->event_type =
+ ev_type;
+ ((struct proc_input *)(sk->sk_user_data))->mcast_op = mc_op;
}
- mc_op = (enum proc_cn_mcast_op *)msg->data;
- switch (*mc_op) {
+ switch (mc_op) {
case PROC_CN_MCAST_LISTEN:
- atomic_inc(&proc_event_num_listeners);
+ if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN))
+ atomic_inc(&proc_event_num_listeners);
break;
case PROC_CN_MCAST_IGNORE:
- atomic_dec(&proc_event_num_listeners);
+ if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE))
+ atomic_dec(&proc_event_num_listeners);
+ ((struct proc_input *)(sk->sk_user_data))->event_type =
+ PROC_EVENT_NONE;
break;
default:
err = EINVAL;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 48ec7ce6ecac..7f7b94f616a6 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -59,7 +59,9 @@ static int cn_already_initialized;
* both, or if both are zero then the group is looked up and sent there.
*/
int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
- gfp_t gfp_mask)
+ gfp_t gfp_mask,
+ int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct cn_callback_entry *__cbq;
unsigned int size;
@@ -110,8 +112,9 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
NETLINK_CB(skb).dst_group = group;
if (group)
- return netlink_broadcast(dev->nls, skb, portid, group,
- gfp_mask);
+ return netlink_broadcast_filtered(dev->nls, skb, portid, group,
+ gfp_mask, filter,
+ (void *)filter_data);
return netlink_unicast(dev->nls, skb, portid,
!gfpflags_allow_blocking(gfp_mask));
}
@@ -121,7 +124,8 @@ EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
gfp_t gfp_mask)
{
- return cn_netlink_send_mult(msg, msg->len, portid, __group, gfp_mask);
+ return cn_netlink_send_mult(msg, msg->len, portid, __group, gfp_mask,
+ NULL, NULL);
}
EXPORT_SYMBOL_GPL(cn_netlink_send);
@@ -163,6 +167,31 @@ static int cn_call_callback(struct sk_buff *skb)
}
/*
+ * Allow non-root access for NETLINK_CONNECTOR family having CN_IDX_PROC
+ * multicast group.
+ */
+static int cn_bind(struct net *net, int group)
+{
+ unsigned long groups = (unsigned long) group;
+
+ if (ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return 0;
+
+ if (test_bit(CN_IDX_PROC - 1, &groups))
+ return 0;
+
+ return -EPERM;
+}
+
+static void cn_release(struct sock *sk, unsigned long *groups)
+{
+ if (groups && test_bit(CN_IDX_PROC - 1, groups)) {
+ kfree(sk->sk_user_data);
+ sk->sk_user_data = NULL;
+ }
+}
+
+/*
* Main netlink receiving function.
*
* It checks skb, netlink header and msg sizes, and calls callback helper.
@@ -249,6 +278,9 @@ static int cn_init(void)
struct netlink_kernel_cfg cfg = {
.groups = CN_NETLINK_USERS + 0xf,
.input = cn_rx_skb,
+ .flags = NL_CFG_F_NONROOT_RECV,
+ .bind = cn_bind,
+ .release = cn_release,
};
dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
diff --git a/drivers/hid/bpf/entrypoints/Makefile b/drivers/hid/bpf/entrypoints/Makefile
index a12edcfa4fe3..43b99b5575cf 100644
--- a/drivers/hid/bpf/entrypoints/Makefile
+++ b/drivers/hid/bpf/entrypoints/Makefile
@@ -58,7 +58,7 @@ entrypoints.lskel.h: $(OUTPUT)/entrypoints.bpf.o | $(BPFTOOL)
$(OUTPUT)/entrypoints.bpf.o: entrypoints.bpf.c $(OUTPUT)/vmlinux.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BPF,$@)
- $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \
+ $(Q)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index a5c5036dfb94..18e4a6e94ba9 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -160,8 +160,13 @@ config CAN_KVASER_PCIEFD
Kvaser PCIEcan 4xHS
Kvaser PCIEcan 2xHS v2
Kvaser PCIEcan HS v2
+ Kvaser PCIEcan 1xCAN v3
+ Kvaser PCIEcan 2xCAN v3
+ Kvaser PCIEcan 4xCAN v2
Kvaser Mini PCI Express HS v2
Kvaser Mini PCI Express 2xHS v2
+ Kvaser Mini PCI Express 1xCAN v3
+ Kvaser Mini PCI Express 2xCAN v3
config CAN_SLCAN
tristate "Serial / USB serial CAN Adaptors (slcan)"
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index 3174efdae271..6d3ba71a6a73 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -30,8 +30,9 @@
#include <linux/ethtool.h>
#include <linux/io.h>
#include <linux/can/dev.h>
+#include <linux/platform_device.h>
#include <linux/spinlock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/dma-mapping.h>
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index db6256f2b1b3..a57005faa04f 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/* Copyright (C) 2018 KVASER AB, Sweden. All rights reserved.
* Parts of this driver are based on the following:
- * - Kvaser linux pciefd driver (version 5.25)
+ * - Kvaser linux pciefd driver (version 5.42)
* - PEAK linux canfd driver
*/
@@ -33,37 +33,27 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_DMA_SIZE (4U * 1024U)
#define KVASER_PCIEFD_VENDOR 0x1a07
+/* Altera based devices */
#define KVASER_PCIEFD_4HS_DEVICE_ID 0x000d
#define KVASER_PCIEFD_2HS_V2_DEVICE_ID 0x000e
#define KVASER_PCIEFD_HS_V2_DEVICE_ID 0x000f
#define KVASER_PCIEFD_MINIPCIE_HS_V2_DEVICE_ID 0x0010
#define KVASER_PCIEFD_MINIPCIE_2HS_V2_DEVICE_ID 0x0011
-/* PCIe IRQ registers */
-#define KVASER_PCIEFD_IRQ_REG 0x40
-#define KVASER_PCIEFD_IEN_REG 0x50
-/* DMA address translation map register base */
-#define KVASER_PCIEFD_DMA_MAP_BASE 0x1000
-/* Loopback control register */
-#define KVASER_PCIEFD_LOOP_REG 0x1f000
-/* System identification and information registers */
-#define KVASER_PCIEFD_SYSID_BASE 0x1f020
-#define KVASER_PCIEFD_SYSID_VERSION_REG (KVASER_PCIEFD_SYSID_BASE + 0x8)
-#define KVASER_PCIEFD_SYSID_CANFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0xc)
-#define KVASER_PCIEFD_SYSID_BUSFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0x10)
-#define KVASER_PCIEFD_SYSID_BUILD_REG (KVASER_PCIEFD_SYSID_BASE + 0x14)
-/* Shared receive buffer registers */
-#define KVASER_PCIEFD_SRB_BASE 0x1f200
-#define KVASER_PCIEFD_SRB_FIFO_LAST_REG (KVASER_PCIEFD_SRB_BASE + 0x1f4)
-#define KVASER_PCIEFD_SRB_CMD_REG (KVASER_PCIEFD_SRB_BASE + 0x200)
-#define KVASER_PCIEFD_SRB_IEN_REG (KVASER_PCIEFD_SRB_BASE + 0x204)
-#define KVASER_PCIEFD_SRB_IRQ_REG (KVASER_PCIEFD_SRB_BASE + 0x20c)
-#define KVASER_PCIEFD_SRB_STAT_REG (KVASER_PCIEFD_SRB_BASE + 0x210)
-#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG (KVASER_PCIEFD_SRB_BASE + 0x214)
-#define KVASER_PCIEFD_SRB_CTRL_REG (KVASER_PCIEFD_SRB_BASE + 0x218)
+/* SmartFusion2 based devices */
+#define KVASER_PCIEFD_2CAN_V3_DEVICE_ID 0x0012
+#define KVASER_PCIEFD_1CAN_V3_DEVICE_ID 0x0013
+#define KVASER_PCIEFD_4CAN_V2_DEVICE_ID 0x0014
+#define KVASER_PCIEFD_MINIPCIE_2CAN_V3_DEVICE_ID 0x0015
+#define KVASER_PCIEFD_MINIPCIE_1CAN_V3_DEVICE_ID 0x0016
+
+/* Altera SerDes Enable 64-bit DMA address translation */
+#define KVASER_PCIEFD_ALTERA_DMA_64BIT BIT(0)
+
+/* SmartFusion2 SerDes LSB address translation mask */
+#define KVASER_PCIEFD_SF2_DMA_LSB_MASK GENMASK(31, 12)
+
/* Kvaser KCAN CAN controller registers */
-#define KVASER_PCIEFD_KCAN0_BASE 0x10000
-#define KVASER_PCIEFD_KCAN_BASE_OFFSET 0x1000
#define KVASER_PCIEFD_KCAN_FIFO_REG 0x100
#define KVASER_PCIEFD_KCAN_FIFO_LAST_REG 0x180
#define KVASER_PCIEFD_KCAN_CTRL_REG 0x2c0
@@ -77,13 +67,20 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_KCAN_BUS_LOAD_REG 0x424
#define KVASER_PCIEFD_KCAN_BTRD_REG 0x428
#define KVASER_PCIEFD_KCAN_PWM_REG 0x430
-
-/* PCI interrupt fields */
-#define KVASER_PCIEFD_IRQ_SRB BIT(4)
-#define KVASER_PCIEFD_IRQ_ALL_MASK GENMASK(4, 0)
-
-/* Enable 64-bit DMA address translation */
-#define KVASER_PCIEFD_64BIT_DMA_BIT BIT(0)
+/* System identification and information registers */
+#define KVASER_PCIEFD_SYSID_VERSION_REG 0x8
+#define KVASER_PCIEFD_SYSID_CANFREQ_REG 0xc
+#define KVASER_PCIEFD_SYSID_BUSFREQ_REG 0x10
+#define KVASER_PCIEFD_SYSID_BUILD_REG 0x14
+/* Shared receive buffer FIFO registers */
+#define KVASER_PCIEFD_SRB_FIFO_LAST_REG 0x1f4
+/* Shared receive buffer registers */
+#define KVASER_PCIEFD_SRB_CMD_REG 0x0
+#define KVASER_PCIEFD_SRB_IEN_REG 0x04
+#define KVASER_PCIEFD_SRB_IRQ_REG 0x0c
+#define KVASER_PCIEFD_SRB_STAT_REG 0x10
+#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG 0x14
+#define KVASER_PCIEFD_SRB_CTRL_REG 0x18
/* System build information fields */
#define KVASER_PCIEFD_SYSID_VERSION_NR_CHAN_MASK GENMASK(31, 24)
@@ -253,7 +250,122 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
/* KCAN Error detected packet, second word */
#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0)
+/* Macros for calculating addresses of registers */
+#define KVASER_PCIEFD_GET_BLOCK_ADDR(pcie, block) \
+ ((pcie)->reg_base + (pcie)->driver_data->address_offset->block)
+#define KVASER_PCIEFD_PCI_IEN_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), pci_ien))
+#define KVASER_PCIEFD_PCI_IRQ_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), pci_irq))
+#define KVASER_PCIEFD_SERDES_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), serdes))
+#define KVASER_PCIEFD_SYSID_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), sysid))
+#define KVASER_PCIEFD_LOOPBACK_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), loopback))
+#define KVASER_PCIEFD_SRB_FIFO_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), kcan_srb_fifo))
+#define KVASER_PCIEFD_SRB_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), kcan_srb))
+#define KVASER_PCIEFD_KCAN_CH0_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), kcan_ch0))
+#define KVASER_PCIEFD_KCAN_CH1_ADDR(pcie) \
+ (KVASER_PCIEFD_GET_BLOCK_ADDR((pcie), kcan_ch1))
+#define KVASER_PCIEFD_KCAN_CHANNEL_SPAN(pcie) \
+ (KVASER_PCIEFD_KCAN_CH1_ADDR((pcie)) - KVASER_PCIEFD_KCAN_CH0_ADDR((pcie)))
+#define KVASER_PCIEFD_KCAN_CHX_ADDR(pcie, i) \
+ (KVASER_PCIEFD_KCAN_CH0_ADDR((pcie)) + (i) * KVASER_PCIEFD_KCAN_CHANNEL_SPAN((pcie)))
+
struct kvaser_pciefd;
+static void kvaser_pciefd_write_dma_map_altera(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index);
+static void kvaser_pciefd_write_dma_map_sf2(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index);
+
+struct kvaser_pciefd_address_offset {
+ u32 serdes;
+ u32 pci_ien;
+ u32 pci_irq;
+ u32 sysid;
+ u32 loopback;
+ u32 kcan_srb_fifo;
+ u32 kcan_srb;
+ u32 kcan_ch0;
+ u32 kcan_ch1;
+};
+
+struct kvaser_pciefd_dev_ops {
+ void (*kvaser_pciefd_write_dma_map)(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index);
+};
+
+struct kvaser_pciefd_irq_mask {
+ u32 kcan_rx0;
+ u32 kcan_tx[KVASER_PCIEFD_MAX_CAN_CHANNELS];
+ u32 all;
+};
+
+struct kvaser_pciefd_driver_data {
+ const struct kvaser_pciefd_address_offset *address_offset;
+ const struct kvaser_pciefd_irq_mask *irq_mask;
+ const struct kvaser_pciefd_dev_ops *ops;
+};
+
+static const struct kvaser_pciefd_address_offset kvaser_pciefd_altera_address_offset = {
+ .serdes = 0x1000,
+ .pci_ien = 0x50,
+ .pci_irq = 0x40,
+ .sysid = 0x1f020,
+ .loopback = 0x1f000,
+ .kcan_srb_fifo = 0x1f200,
+ .kcan_srb = 0x1f400,
+ .kcan_ch0 = 0x10000,
+ .kcan_ch1 = 0x11000,
+};
+
+static const struct kvaser_pciefd_address_offset kvaser_pciefd_sf2_address_offset = {
+ .serdes = 0x280c8,
+ .pci_ien = 0x102004,
+ .pci_irq = 0x102008,
+ .sysid = 0x100000,
+ .loopback = 0x103000,
+ .kcan_srb_fifo = 0x120000,
+ .kcan_srb = 0x121000,
+ .kcan_ch0 = 0x140000,
+ .kcan_ch1 = 0x142000,
+};
+
+static const struct kvaser_pciefd_irq_mask kvaser_pciefd_altera_irq_mask = {
+ .kcan_rx0 = BIT(4),
+ .kcan_tx = { BIT(0), BIT(1), BIT(2), BIT(3) },
+ .all = GENMASK(4, 0),
+};
+
+static const struct kvaser_pciefd_irq_mask kvaser_pciefd_sf2_irq_mask = {
+ .kcan_rx0 = BIT(4),
+ .kcan_tx = { BIT(16), BIT(17), BIT(18), BIT(19) },
+ .all = GENMASK(19, 16) | BIT(4),
+};
+
+static const struct kvaser_pciefd_dev_ops kvaser_pciefd_altera_dev_ops = {
+ .kvaser_pciefd_write_dma_map = kvaser_pciefd_write_dma_map_altera,
+};
+
+static const struct kvaser_pciefd_dev_ops kvaser_pciefd_sf2_dev_ops = {
+ .kvaser_pciefd_write_dma_map = kvaser_pciefd_write_dma_map_sf2,
+};
+
+static const struct kvaser_pciefd_driver_data kvaser_pciefd_altera_driver_data = {
+ .address_offset = &kvaser_pciefd_altera_address_offset,
+ .irq_mask = &kvaser_pciefd_altera_irq_mask,
+ .ops = &kvaser_pciefd_altera_dev_ops,
+};
+
+static const struct kvaser_pciefd_driver_data kvaser_pciefd_sf2_driver_data = {
+ .address_offset = &kvaser_pciefd_sf2_address_offset,
+ .irq_mask = &kvaser_pciefd_sf2_irq_mask,
+ .ops = &kvaser_pciefd_sf2_dev_ops,
+};
struct kvaser_pciefd_can {
struct can_priv can;
@@ -273,6 +385,7 @@ struct kvaser_pciefd {
struct pci_dev *pci;
void __iomem *reg_base;
struct kvaser_pciefd_can *can[KVASER_PCIEFD_MAX_CAN_CHANNELS];
+ const struct kvaser_pciefd_driver_data *driver_data;
void *dma_data[KVASER_PCIEFD_DMA_COUNT];
u8 nr_channels;
u32 bus_freq;
@@ -305,18 +418,43 @@ static const struct can_bittiming_const kvaser_pciefd_bittiming_const = {
static struct pci_device_id kvaser_pciefd_id_table[] = {
{
PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_4HS_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_altera_driver_data,
},
{
PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_2HS_V2_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_altera_driver_data,
},
{
PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_HS_V2_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_altera_driver_data,
},
{
PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_MINIPCIE_HS_V2_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_altera_driver_data,
},
{
PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_MINIPCIE_2HS_V2_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_altera_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_2CAN_V3_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_1CAN_V3_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_4CAN_V2_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_MINIPCIE_2CAN_V3_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
+ },
+ {
+ PCI_DEVICE(KVASER_PCIEFD_VENDOR, KVASER_PCIEFD_MINIPCIE_1CAN_V3_DEVICE_ID),
+ .driver_data = (kernel_ulong_t)&kvaser_pciefd_sf2_driver_data,
},
{
0,
@@ -783,8 +921,7 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
can = netdev_priv(netdev);
netdev->netdev_ops = &kvaser_pciefd_netdev_ops;
netdev->ethtool_ops = &kvaser_pciefd_ethtool_ops;
- can->reg_base = pcie->reg_base + KVASER_PCIEFD_KCAN0_BASE +
- i * KVASER_PCIEFD_KCAN_BASE_OFFSET;
+ can->reg_base = KVASER_PCIEFD_KCAN_CHX_ADDR(pcie, i);
can->kv_pcie = pcie;
can->cmd_seq = 0;
can->err_rep_cnt = 0;
@@ -865,20 +1002,37 @@ static int kvaser_pciefd_reg_candev(struct kvaser_pciefd *pcie)
return 0;
}
-static void kvaser_pciefd_write_dma_map(struct kvaser_pciefd *pcie,
- dma_addr_t addr, int offset)
+static void kvaser_pciefd_write_dma_map_altera(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index)
{
+ void __iomem *serdes_base;
u32 word1, word2;
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- word1 = addr | KVASER_PCIEFD_64BIT_DMA_BIT;
+ word1 = addr | KVASER_PCIEFD_ALTERA_DMA_64BIT;
word2 = addr >> 32;
#else
word1 = addr;
word2 = 0;
#endif
- iowrite32(word1, pcie->reg_base + offset);
- iowrite32(word2, pcie->reg_base + offset + 4);
+ serdes_base = KVASER_PCIEFD_SERDES_ADDR(pcie) + 0x8 * index;
+ iowrite32(word1, serdes_base);
+ iowrite32(word2, serdes_base + 0x4);
+}
+
+static void kvaser_pciefd_write_dma_map_sf2(struct kvaser_pciefd *pcie,
+ dma_addr_t addr, int index)
+{
+ void __iomem *serdes_base;
+ u32 lsb = addr & KVASER_PCIEFD_SF2_DMA_LSB_MASK;
+ u32 msb = 0x0;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ msb = addr >> 32;
+#endif
+ serdes_base = KVASER_PCIEFD_SERDES_ADDR(pcie) + 0x10 * index;
+ iowrite32(lsb, serdes_base);
+ iowrite32(msb, serdes_base + 0x4);
}
static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
@@ -889,10 +1043,8 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
dma_addr_t dma_addr[KVASER_PCIEFD_DMA_COUNT];
/* Disable the DMA */
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_SRB_CTRL_REG);
+ iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
for (i = 0; i < KVASER_PCIEFD_DMA_COUNT; i++) {
- unsigned int offset = KVASER_PCIEFD_DMA_MAP_BASE + 8 * i;
-
pcie->dma_data[i] = dmam_alloc_coherent(&pcie->pci->dev,
KVASER_PCIEFD_DMA_SIZE,
&dma_addr[i],
@@ -903,24 +1055,25 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
KVASER_PCIEFD_DMA_SIZE);
return -ENOMEM;
}
- kvaser_pciefd_write_dma_map(pcie, dma_addr[i], offset);
+ pcie->driver_data->ops->kvaser_pciefd_write_dma_map(pcie, dma_addr[i], i);
}
/* Reset Rx FIFO, and both DMA buffers */
iowrite32(KVASER_PCIEFD_SRB_CMD_FOR | KVASER_PCIEFD_SRB_CMD_RDB0 |
KVASER_PCIEFD_SRB_CMD_RDB1,
- pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
/* Empty Rx FIFO */
srb_packet_count =
FIELD_GET(KVASER_PCIEFD_SRB_RX_NR_PACKETS_MASK,
- ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG));
+ ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) +
+ KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG));
while (srb_packet_count) {
/* Drop current packet in FIFO */
- ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_FIFO_LAST_REG);
+ ioread32(KVASER_PCIEFD_SRB_FIFO_ADDR(pcie) + KVASER_PCIEFD_SRB_FIFO_LAST_REG);
srb_packet_count--;
}
- srb_status = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_STAT_REG);
+ srb_status = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_STAT_REG);
if (!(srb_status & KVASER_PCIEFD_SRB_STAT_DI)) {
dev_err(&pcie->pci->dev, "DMA not idle before enabling\n");
return -EIO;
@@ -928,7 +1081,7 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
/* Enable the DMA */
iowrite32(KVASER_PCIEFD_SRB_CTRL_DMA_ENABLE,
- pcie->reg_base + KVASER_PCIEFD_SRB_CTRL_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
return 0;
}
@@ -937,30 +1090,29 @@ static int kvaser_pciefd_setup_board(struct kvaser_pciefd *pcie)
{
u32 version, srb_status, build;
- version = ioread32(pcie->reg_base + KVASER_PCIEFD_SYSID_VERSION_REG);
+ version = ioread32(KVASER_PCIEFD_SYSID_ADDR(pcie) + KVASER_PCIEFD_SYSID_VERSION_REG);
pcie->nr_channels = min(KVASER_PCIEFD_MAX_CAN_CHANNELS,
FIELD_GET(KVASER_PCIEFD_SYSID_VERSION_NR_CHAN_MASK, version));
- build = ioread32(pcie->reg_base + KVASER_PCIEFD_SYSID_BUILD_REG);
+ build = ioread32(KVASER_PCIEFD_SYSID_ADDR(pcie) + KVASER_PCIEFD_SYSID_BUILD_REG);
dev_dbg(&pcie->pci->dev, "Version %lu.%lu.%lu\n",
FIELD_GET(KVASER_PCIEFD_SYSID_VERSION_MAJOR_MASK, version),
FIELD_GET(KVASER_PCIEFD_SYSID_VERSION_MINOR_MASK, version),
FIELD_GET(KVASER_PCIEFD_SYSID_BUILD_SEQ_MASK, build));
- srb_status = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_STAT_REG);
+ srb_status = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_STAT_REG);
if (!(srb_status & KVASER_PCIEFD_SRB_STAT_DMA)) {
dev_err(&pcie->pci->dev, "Hardware without DMA is not supported\n");
return -ENODEV;
}
- pcie->bus_freq = ioread32(pcie->reg_base +
- KVASER_PCIEFD_SYSID_BUSFREQ_REG);
- pcie->freq = ioread32(pcie->reg_base + KVASER_PCIEFD_SYSID_CANFREQ_REG);
+ pcie->bus_freq = ioread32(KVASER_PCIEFD_SYSID_ADDR(pcie) + KVASER_PCIEFD_SYSID_BUSFREQ_REG);
+ pcie->freq = ioread32(KVASER_PCIEFD_SYSID_ADDR(pcie) + KVASER_PCIEFD_SYSID_CANFREQ_REG);
pcie->freq_to_ticks_div = pcie->freq / 1000000;
if (pcie->freq_to_ticks_div == 0)
pcie->freq_to_ticks_div = 1;
/* Turn off all loopback functionality */
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_LOOP_REG);
+ iowrite32(0, KVASER_PCIEFD_LOOPBACK_ADDR(pcie));
return 0;
}
@@ -1430,21 +1582,20 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf)
static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
{
- u32 irq;
+ u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
- irq = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_IRQ_REG);
if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) {
kvaser_pciefd_read_buffer(pcie, 0);
/* Reset DMA buffer 0 */
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0,
- pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
}
if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) {
kvaser_pciefd_read_buffer(pcie, 1);
/* Reset DMA buffer 1 */
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1,
- pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
}
if (irq & KVASER_PCIEFD_SRB_IRQ_DOF0 ||
@@ -1453,7 +1604,7 @@ static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
irq & KVASER_PCIEFD_SRB_IRQ_DUF1)
dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq);
- iowrite32(irq, pcie->reg_base + KVASER_PCIEFD_SRB_IRQ_REG);
+ iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
}
static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
@@ -1479,15 +1630,14 @@ static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev)
{
struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev;
- u32 board_irq;
+ const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask;
+ u32 board_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie));
int i;
- board_irq = ioread32(pcie->reg_base + KVASER_PCIEFD_IRQ_REG);
-
- if (!(board_irq & KVASER_PCIEFD_IRQ_ALL_MASK))
+ if (!(board_irq & irq_mask->all))
return IRQ_NONE;
- if (board_irq & KVASER_PCIEFD_IRQ_SRB)
+ if (board_irq & irq_mask->kcan_rx0)
kvaser_pciefd_receive_irq(pcie);
for (i = 0; i < pcie->nr_channels; i++) {
@@ -1498,7 +1648,7 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev)
}
/* Check that mask matches channel (i) IRQ mask */
- if (board_irq & (1 << i))
+ if (board_irq & irq_mask->kcan_tx[i])
kvaser_pciefd_transmit_irq(pcie->can[i]);
}
@@ -1525,6 +1675,8 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
{
int err;
struct kvaser_pciefd *pcie;
+ const struct kvaser_pciefd_irq_mask *irq_mask;
+ void __iomem *irq_en_base;
pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
if (!pcie)
@@ -1532,6 +1684,8 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, pcie);
pcie->pci = pdev;
+ pcie->driver_data = (const struct kvaser_pciefd_driver_data *)id->driver_data;
+ irq_mask = pcie->driver_data->irq_mask;
err = pci_enable_device(pdev);
if (err)
@@ -1567,22 +1721,21 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
goto err_teardown_can_ctrls;
iowrite32(KVASER_PCIEFD_SRB_IRQ_DPD0 | KVASER_PCIEFD_SRB_IRQ_DPD1,
- pcie->reg_base + KVASER_PCIEFD_SRB_IRQ_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
iowrite32(KVASER_PCIEFD_SRB_IRQ_DPD0 | KVASER_PCIEFD_SRB_IRQ_DPD1 |
KVASER_PCIEFD_SRB_IRQ_DOF0 | KVASER_PCIEFD_SRB_IRQ_DOF1 |
KVASER_PCIEFD_SRB_IRQ_DUF0 | KVASER_PCIEFD_SRB_IRQ_DUF1,
- pcie->reg_base + KVASER_PCIEFD_SRB_IEN_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG);
/* Enable PCI interrupts */
- iowrite32(KVASER_PCIEFD_IRQ_ALL_MASK,
- pcie->reg_base + KVASER_PCIEFD_IEN_REG);
-
+ irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie);
+ iowrite32(irq_mask->all, irq_en_base);
/* Ready the DMA buffers */
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0,
- pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1,
- pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
err = kvaser_pciefd_reg_candev(pcie);
if (err)
@@ -1592,12 +1745,12 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
err_free_irq:
/* Disable PCI interrupts */
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_IEN_REG);
+ iowrite32(0, irq_en_base);
free_irq(pcie->pci->irq, pcie);
err_teardown_can_ctrls:
kvaser_pciefd_teardown_can_ctrls(pcie);
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_SRB_CTRL_REG);
+ iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
pci_clear_master(pdev);
err_pci_iounmap:
@@ -1636,8 +1789,8 @@ static void kvaser_pciefd_remove(struct pci_dev *pdev)
kvaser_pciefd_remove_all_ctrls(pcie);
/* Disable interrupts */
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_SRB_CTRL_REG);
- iowrite32(0, pcie->reg_base + KVASER_PCIEFD_IEN_REG);
+ iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
+ iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
free_irq(pcie->pci->irq, pcie);
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index c5af92bcc9c9..13fd84b2e2dd 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -11,6 +11,7 @@
#include <linux/bitfield.h>
#include <linux/can/dev.h>
#include <linux/ethtool.h>
+#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -308,6 +309,9 @@ enum m_can_reg {
#define TX_EVENT_MM_MASK GENMASK(31, 24)
#define TX_EVENT_TXTS_MASK GENMASK(15, 0)
+/* Hrtimer polling interval */
+#define HRTIMER_POLL_INTERVAL_MS 1
+
/* The ID and DLC registers are adjacent in M_CAN FIFO memory,
* and we can save a (potentially slow) bus round trip by combining
* reads and writes to them.
@@ -1414,6 +1418,12 @@ static int m_can_start(struct net_device *dev)
m_can_enable_all_interrupts(cdev);
+ if (!dev->irq) {
+ dev_dbg(cdev->dev, "Start hrtimer\n");
+ hrtimer_start(&cdev->hrtimer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS),
+ HRTIMER_MODE_REL_PINNED);
+ }
+
return 0;
}
@@ -1568,6 +1578,11 @@ static void m_can_stop(struct net_device *dev)
{
struct m_can_classdev *cdev = netdev_priv(dev);
+ if (!dev->irq) {
+ dev_dbg(cdev->dev, "Stop hrtimer\n");
+ hrtimer_cancel(&cdev->hrtimer);
+ }
+
/* disable all interrupts */
m_can_disable_all_interrupts(cdev);
@@ -1793,6 +1808,18 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
+static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer)
+{
+ struct m_can_classdev *cdev = container_of(timer, struct
+ m_can_classdev, hrtimer);
+
+ m_can_isr(0, cdev->net);
+
+ hrtimer_forward_now(timer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS));
+
+ return HRTIMER_RESTART;
+}
+
static int m_can_open(struct net_device *dev)
{
struct m_can_classdev *cdev = netdev_priv(dev);
@@ -1831,7 +1858,7 @@ static int m_can_open(struct net_device *dev)
err = request_threaded_irq(dev->irq, NULL, m_can_isr,
IRQF_ONESHOT,
dev->name, dev);
- } else {
+ } else if (dev->irq) {
err = request_irq(dev->irq, m_can_isr, IRQF_SHARED, dev->name,
dev);
}
@@ -2027,6 +2054,9 @@ int m_can_class_register(struct m_can_classdev *cdev)
goto clk_disable;
}
+ if (!cdev->net->irq)
+ cdev->hrtimer.function = &hrtimer_callback;
+
ret = m_can_dev_setup(cdev);
if (ret)
goto rx_offload_del;
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index a839dc71dc9b..2ac18ac867a4 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -15,6 +15,7 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/freezer.h>
+#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -93,6 +94,8 @@ struct m_can_classdev {
int is_peripheral;
struct mram_cfg mcfg[MRAM_CFG_NUM];
+
+ struct hrtimer hrtimer;
};
struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, int sizeof_priv);
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index 94dc82644113..cdb28d6a092c 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -5,6 +5,7 @@
//
// Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/
+#include <linux/hrtimer.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
@@ -82,7 +83,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
void __iomem *addr;
void __iomem *mram_addr;
struct phy *transceiver;
- int irq, ret = 0;
+ int irq = 0, ret = 0;
mcan_class = m_can_class_allocate_dev(&pdev->dev,
sizeof(struct m_can_plat_priv));
@@ -96,12 +97,24 @@ static int m_can_plat_probe(struct platform_device *pdev)
goto probe_fail;
addr = devm_platform_ioremap_resource_byname(pdev, "m_can");
- irq = platform_get_irq_byname(pdev, "int0");
- if (IS_ERR(addr) || irq < 0) {
- ret = -EINVAL;
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
goto probe_fail;
}
+ if (device_property_present(mcan_class->dev, "interrupts") ||
+ device_property_present(mcan_class->dev, "interrupt-names")) {
+ irq = platform_get_irq_byname(pdev, "int0");
+ if (irq < 0) {
+ ret = irq;
+ goto probe_fail;
+ }
+ } else {
+ dev_dbg(mcan_class->dev, "Polling enabled, initialize hrtimer");
+ hrtimer_init(&mcan_class->hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
+ }
+
/* message ram could be shared */
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram");
if (!res) {
diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c
index c56e27223e5f..ac86640998a8 100644
--- a/drivers/net/can/sja1000/ems_pci.c
+++ b/drivers/net/can/sja1000/ems_pci.c
@@ -148,7 +148,7 @@ static void ems_pci_v1_write_reg(const struct sja1000_priv *priv,
static void ems_pci_v1_post_irq(const struct sja1000_priv *priv)
{
- struct ems_pci_card *card = (struct ems_pci_card *)priv->priv;
+ struct ems_pci_card *card = priv->priv;
/* reset int flag of pita */
writel(PITA2_ICR_INT0_EN | PITA2_ICR_INT0,
@@ -168,7 +168,7 @@ static void ems_pci_v2_write_reg(const struct sja1000_priv *priv,
static void ems_pci_v2_post_irq(const struct sja1000_priv *priv)
{
- struct ems_pci_card *card = (struct ems_pci_card *)priv->priv;
+ struct ems_pci_card *card = priv->priv;
writel(PLX_ICSR_ENA_CLR, card->conf_addr + PLX_ICSR);
}
@@ -186,7 +186,7 @@ static void ems_pci_v3_write_reg(const struct sja1000_priv *priv,
static void ems_pci_v3_post_irq(const struct sja1000_priv *priv)
{
- struct ems_pci_card *card = (struct ems_pci_card *)priv->priv;
+ struct ems_pci_card *card = priv->priv;
writel(ASIX_LINTSR_INT0AC, card->conf_addr + ASIX_LINTSR);
}
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index a0f7bcec719c..39a63b7313a4 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -284,7 +284,7 @@ struct ucan_priv {
*/
spinlock_t echo_skb_lock;
- /* usb device information information */
+ /* usb device information */
u8 intf_index;
u8 in_ep_addr;
u8 out_ep_addr;
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 4d3283db3a13..abe58f103043 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -30,6 +30,7 @@
#include <linux/can/error.h>
#include <linux/phy/phy.h>
#include <linux/pm_runtime.h>
+#include <linux/reset.h>
#define DRIVER_NAME "xilinx_can"
@@ -200,6 +201,7 @@ struct xcan_devtype_data {
* @can_clk: Pointer to struct clk
* @devtype: Device type specific constants
* @transceiver: Optional pointer to associated CAN transceiver
+ * @rstc: Pointer to reset control
*/
struct xcan_priv {
struct can_priv can;
@@ -218,6 +220,7 @@ struct xcan_priv {
struct clk *can_clk;
struct xcan_devtype_data devtype;
struct phy *transceiver;
+ struct reset_control *rstc;
};
/* CAN Bittiming constants as per Xilinx CAN specs */
@@ -1799,6 +1802,16 @@ static int xcan_probe(struct platform_device *pdev)
priv->can.do_get_berr_counter = xcan_get_berr_counter;
priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
CAN_CTRLMODE_BERR_REPORTING;
+ priv->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(priv->rstc)) {
+ dev_err(&pdev->dev, "Cannot get CAN reset.\n");
+ ret = PTR_ERR(priv->rstc);
+ goto err_free;
+ }
+
+ ret = reset_control_reset(priv->rstc);
+ if (ret)
+ goto err_free;
if (devtype->cantype == XAXI_CANFD) {
priv->can.data_bittiming_const =
@@ -1827,7 +1840,7 @@ static int xcan_probe(struct platform_device *pdev)
/* Get IRQ for the device */
ret = platform_get_irq(pdev, 0);
if (ret < 0)
- goto err_free;
+ goto err_reset;
ndev->irq = ret;
@@ -1843,21 +1856,21 @@ static int xcan_probe(struct platform_device *pdev)
if (IS_ERR(priv->can_clk)) {
ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->can_clk),
"device clock not found\n");
- goto err_free;
+ goto err_reset;
}
priv->bus_clk = devm_clk_get(&pdev->dev, devtype->bus_clk_name);
if (IS_ERR(priv->bus_clk)) {
ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->bus_clk),
"bus clock not found\n");
- goto err_free;
+ goto err_reset;
}
transceiver = devm_phy_optional_get(&pdev->dev, NULL);
if (IS_ERR(transceiver)) {
ret = PTR_ERR(transceiver);
dev_err_probe(&pdev->dev, ret, "failed to get phy\n");
- goto err_free;
+ goto err_reset;
}
priv->transceiver = transceiver;
@@ -1904,6 +1917,8 @@ static int xcan_probe(struct platform_device *pdev)
err_disableclks:
pm_runtime_put(priv->dev);
pm_runtime_disable(&pdev->dev);
+err_reset:
+ reset_control_assert(priv->rstc);
err_free:
free_candev(ndev);
err:
@@ -1920,9 +1935,11 @@ err:
static void xcan_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
+ struct xcan_priv *priv = netdev_priv(ndev);
unregister_candev(ndev);
pm_runtime_disable(&pdev->dev);
+ reset_control_assert(priv->rstc);
free_candev(ndev);
}
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 3464ce5e7470..4e27dc913cf7 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1393,12 +1393,6 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port,
/* Get the implementation specific capabilities */
if (dev->ops->phylink_get_caps)
dev->ops->phylink_get_caps(dev, port, config);
-
- /* This driver does not make use of the speed, duplex, pause or the
- * advertisement in its mac_config, so it is safe to mark this driver
- * as non-legacy.
- */
- config->legacy_pre_march2020 = false;
}
static struct phylink_pcs *b53_phylink_mac_select_pcs(struct dsa_switch *ds,
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index 8b422b298cd5..4d55d8d18376 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/phy.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/delay.h>
#include <linux/brcmphy.h>
#include <linux/rtnetlink.h>
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 5db1ed26f03a..5e39641ea887 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -19,6 +19,7 @@
#include <linux/bits.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/platform_data/b53.h>
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index af50001ccdd4..720f4e4ed0b0 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/platform_device.h>
#include <linux/bitops.h>
diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
index 3e44ccb7db84..5249a1c2a80b 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c
+++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
@@ -9,6 +9,7 @@
* Kurt Kanzenbach <kurt@linutronix.de>
*/
+#include <linux/of.h>
#include <linux/ptp_clock_kernel.h>
#include "hellcreek.h"
#include "hellcreek_ptp.h"
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index ff76444057d2..ee67adeb2cdb 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -8,6 +8,7 @@
#include <linux/regmap.h>
#include <linux/mutex.h>
#include <linux/mii.h>
+#include <linux/of.h>
#include <linux/phy.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
@@ -1290,12 +1291,6 @@ static void lan9303_phylink_get_caps(struct dsa_switch *ds, int port,
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
}
-
- /* This driver does not make use of the speed, duplex, pause or the
- * advertisement in its mac_config, so it is safe to mark this driver
- * as non-legacy.
- */
- config->legacy_pre_march2020 = false;
}
static void lan9303_phylink_mac_link_up(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
index fd6e2e69a42a..5711a59e2ac9 100644
--- a/drivers/net/dsa/microchip/ksz8863_smi.c
+++ b/drivers/net/dsa/microchip/ksz8863_smi.c
@@ -5,6 +5,9 @@
* Copyright (C) 2019 Pengutronix, Michael Grzeschik <kernel@pengutronix.de>
*/
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
#include "ksz8.h"
#include "ksz_common.h"
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b18cd170ec06..2414c1f8bc2d 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -18,8 +18,8 @@
#include <linux/if_vlan.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
+#include <linux/of.h>
#include <linux/of_mdio.h>
-#include <linux/of_device.h>
#include <linux/of_net.h>
#include <linux/micrel_phy.h>
#include <net/dsa.h>
@@ -1624,8 +1624,6 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
{
struct ksz_device *dev = ds->priv;
- config->legacy_pre_march2020 = false;
-
if (dev->info->supports_mii[port])
__set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces);
diff --git a/drivers/net/dsa/mt7530-mmio.c b/drivers/net/dsa/mt7530-mmio.c
index 1a3d4b692f34..0a6a2fe34e64 100644
--- a/drivers/net/dsa/mt7530-mmio.c
+++ b/drivers/net/dsa/mt7530-mmio.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 38b3c6dda386..8fbda739c1b3 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2949,12 +2949,6 @@ static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port,
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000FD;
- /* This driver does not make use of the speed, duplex, pause or the
- * advertisement in its mac_config, so it is safe to mark this driver
- * as non-legacy.
- */
- config->legacy_pre_march2020 = false;
-
priv->info->mac_port_get_caps(ds, port, config);
}
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 1409e691ab77..a9a9651187db 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -9,6 +9,9 @@ mv88e6xxx-objs += global2.o
mv88e6xxx-objs += global2_avb.o
mv88e6xxx-objs += global2_scratch.o
mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
+mv88e6xxx-objs += pcs-6185.o
+mv88e6xxx-objs += pcs-6352.o
+mv88e6xxx-objs += pcs-639x.o
mv88e6xxx-objs += phy.o
mv88e6xxx-objs += port.o
mv88e6xxx-objs += port_hidden.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c7d51a539451..bce9c9e43752 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -23,7 +23,7 @@
#include <linux/list.h>
#include <linux/mdio.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_mdio.h>
#include <linux/platform_data/mv88e6xxx.h>
@@ -499,81 +499,6 @@ static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port)
return !!(reg & MV88E6XXX_PORT_STS_PHY_DETECT);
}
-static int mv88e6xxx_serdes_pcs_get_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- int lane;
- int err;
-
- mv88e6xxx_reg_lock(chip);
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0 && chip->info->ops->serdes_pcs_get_state)
- err = chip->info->ops->serdes_pcs_get_state(chip, port, lane,
- state);
- else
- err = -EOPNOTSUPP;
- mv88e6xxx_reg_unlock(chip);
-
- return err;
-}
-
-static int mv88e6xxx_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
- unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise)
-{
- const struct mv88e6xxx_ops *ops = chip->info->ops;
- int lane;
-
- if (ops->serdes_pcs_config) {
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0)
- return ops->serdes_pcs_config(chip, port, lane, mode,
- interface, advertise);
- }
-
- return 0;
-}
-
-static void mv88e6xxx_serdes_pcs_an_restart(struct dsa_switch *ds, int port)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- const struct mv88e6xxx_ops *ops;
- int err = 0;
- int lane;
-
- ops = chip->info->ops;
-
- if (ops->serdes_pcs_an_restart) {
- mv88e6xxx_reg_lock(chip);
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0)
- err = ops->serdes_pcs_an_restart(chip, port, lane);
- mv88e6xxx_reg_unlock(chip);
-
- if (err)
- dev_err(ds->dev, "p%d: failed to restart AN\n", port);
- }
-}
-
-static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
- unsigned int mode,
- int speed, int duplex)
-{
- const struct mv88e6xxx_ops *ops = chip->info->ops;
- int lane;
-
- if (!phylink_autoneg_inband(mode) && ops->serdes_pcs_link_up) {
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0)
- return ops->serdes_pcs_link_up(chip, port, lane,
- speed, duplex);
- }
-
- return 0;
-}
-
static const u8 mv88e6185_phy_interface_modes[] = {
[MV88E6185_PORT_STS_CMODE_GMII_FD] = PHY_INTERFACE_MODE_GMII,
[MV88E6185_PORT_STS_CMODE_MII_100_FD_PS] = PHY_INTERFACE_MODE_MII,
@@ -853,6 +778,20 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
}
}
+static struct phylink_pcs *mv88e6xxx_mac_select_pcs(struct dsa_switch *ds,
+ int port,
+ phy_interface_t interface)
+{
+ struct mv88e6xxx_chip *chip = ds->priv;
+ struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
+
+ if (chip->info->ops->pcs_ops)
+ pcs = chip->info->ops->pcs_ops->pcs_select(chip, port,
+ interface);
+
+ return pcs;
+}
+
static int mv88e6xxx_mac_prepare(struct dsa_switch *ds, int port,
unsigned int mode, phy_interface_t interface)
{
@@ -889,16 +828,6 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
state->interface);
if (err && err != -EOPNOTSUPP)
goto err_unlock;
-
- err = mv88e6xxx_serdes_pcs_config(chip, port, mode,
- state->interface,
- state->advertising);
- /* FIXME: we should restart negotiation if something changed -
- * which is something we get if we convert to using phylinks
- * PCS operations.
- */
- if (err > 0)
- err = 0;
}
err_unlock:
@@ -982,17 +911,6 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
*/
if (!mv88e6xxx_port_ppu_updates(chip, port) ||
mode == MLO_AN_FIXED) {
- /* FIXME: for an automedia port, should we force the link
- * down here - what if the link comes up due to "other" media
- * while we're bringing the port up, how is the exclusivity
- * handled in the Marvell hardware? E.g. port 2 on 88E6390
- * shared between internal PHY and Serdes.
- */
- err = mv88e6xxx_serdes_pcs_link_up(chip, port, mode, speed,
- duplex);
- if (err)
- goto error;
-
if (ops->port_set_speed_duplex) {
err = ops->port_set_speed_duplex(chip, port,
speed, duplex);
@@ -3163,102 +3081,6 @@ static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
return 0;
}
-static irqreturn_t mv88e6xxx_serdes_irq_thread_fn(int irq, void *dev_id)
-{
- struct mv88e6xxx_port *mvp = dev_id;
- struct mv88e6xxx_chip *chip = mvp->chip;
- irqreturn_t ret = IRQ_NONE;
- int port = mvp->port;
- int lane;
-
- mv88e6xxx_reg_lock(chip);
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0)
- ret = mv88e6xxx_serdes_irq_status(chip, port, lane);
- mv88e6xxx_reg_unlock(chip);
-
- return ret;
-}
-
-static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- struct mv88e6xxx_port *dev_id = &chip->ports[port];
- unsigned int irq;
- int err;
-
- /* Nothing to request if this SERDES port has no IRQ */
- irq = mv88e6xxx_serdes_irq_mapping(chip, port);
- if (!irq)
- return 0;
-
- snprintf(dev_id->serdes_irq_name, sizeof(dev_id->serdes_irq_name),
- "mv88e6xxx-%s-serdes-%d", dev_name(chip->dev), port);
-
- /* Requesting the IRQ will trigger IRQ callbacks, so release the lock */
- mv88e6xxx_reg_unlock(chip);
- err = request_threaded_irq(irq, NULL, mv88e6xxx_serdes_irq_thread_fn,
- IRQF_ONESHOT, dev_id->serdes_irq_name,
- dev_id);
- mv88e6xxx_reg_lock(chip);
- if (err)
- return err;
-
- dev_id->serdes_irq = irq;
-
- return mv88e6xxx_serdes_irq_enable(chip, port, lane);
-}
-
-static int mv88e6xxx_serdes_irq_free(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- struct mv88e6xxx_port *dev_id = &chip->ports[port];
- unsigned int irq = dev_id->serdes_irq;
- int err;
-
- /* Nothing to free if no IRQ has been requested */
- if (!irq)
- return 0;
-
- err = mv88e6xxx_serdes_irq_disable(chip, port, lane);
-
- /* Freeing the IRQ will trigger IRQ callbacks, so release the lock */
- mv88e6xxx_reg_unlock(chip);
- free_irq(irq, dev_id);
- mv88e6xxx_reg_lock(chip);
-
- dev_id->serdes_irq = 0;
-
- return err;
-}
-
-static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
- bool on)
-{
- int lane;
- int err;
-
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane < 0)
- return 0;
-
- if (on) {
- err = mv88e6xxx_serdes_power_up(chip, port, lane);
- if (err)
- return err;
-
- err = mv88e6xxx_serdes_irq_request(chip, port, lane);
- } else {
- err = mv88e6xxx_serdes_irq_free(chip, port, lane);
- if (err)
- return err;
-
- err = mv88e6xxx_serdes_power_down(chip, port, lane);
- }
-
- return err;
-}
-
static int mv88e6xxx_set_egress_port(struct mv88e6xxx_chip *chip,
enum mv88e6xxx_egress_direction direction,
int port)
@@ -3322,56 +3144,17 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
{
struct device_node *phy_handle = NULL;
struct dsa_switch *ds = chip->ds;
- phy_interface_t mode;
struct dsa_port *dp;
- int tx_amp, speed;
+ int tx_amp;
int err;
u16 reg;
chip->ports[port].chip = chip;
chip->ports[port].port = port;
- dp = dsa_to_port(ds, port);
-
- /* MAC Forcing register: don't force link, speed, duplex or flow control
- * state to any particular values on physical ports, but force the CPU
- * port and all DSA ports to their maximum bandwidth and full duplex.
- */
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
- struct phylink_config pl_config = {};
- unsigned long caps;
-
- chip->info->ops->phylink_get_caps(chip, port, &pl_config);
-
- caps = pl_config.mac_capabilities;
-
- if (chip->info->ops->port_max_speed_mode)
- mode = chip->info->ops->port_max_speed_mode(chip, port);
- else
- mode = PHY_INTERFACE_MODE_NA;
-
- if (caps & MAC_10000FD)
- speed = SPEED_10000;
- else if (caps & MAC_5000FD)
- speed = SPEED_5000;
- else if (caps & MAC_2500FD)
- speed = SPEED_2500;
- else if (caps & MAC_1000)
- speed = SPEED_1000;
- else if (caps & MAC_100)
- speed = SPEED_100;
- else
- speed = SPEED_10;
-
- err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP,
- speed, DUPLEX_FULL,
- PAUSE_OFF, mode);
- } else {
- err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED,
- SPEED_UNFORCED, DUPLEX_UNFORCED,
- PAUSE_ON,
- PHY_INTERFACE_MODE_NA);
- }
+ err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED,
+ SPEED_UNFORCED, DUPLEX_UNFORCED,
+ PAUSE_ON, PHY_INTERFACE_MODE_NA);
if (err)
return err;
@@ -3548,6 +3331,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
}
if (chip->info->ops->serdes_set_tx_amplitude) {
+ dp = dsa_to_port(ds, port);
if (dp)
phy_handle = of_parse_phandle(dp->dn, "phy-handle", 0);
@@ -3621,29 +3405,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
return ret;
}
-static int mv88e6xxx_port_enable(struct dsa_switch *ds, int port,
- struct phy_device *phydev)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- int err;
-
- mv88e6xxx_reg_lock(chip);
- err = mv88e6xxx_serdes_power(chip, port, true);
- mv88e6xxx_reg_unlock(chip);
-
- return err;
-}
-
-static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
-
- mv88e6xxx_reg_lock(chip);
- if (mv88e6xxx_serdes_power(chip, port, false))
- dev_err(chip->dev, "failed to power off SERDES\n");
- mv88e6xxx_reg_unlock(chip);
-}
-
static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
{
@@ -4106,12 +3867,26 @@ out_mdios:
static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
{
+ struct mv88e6xxx_chip *chip = ds->priv;
+ int err;
+
+ if (chip->info->ops->pcs_ops->pcs_init) {
+ err = chip->info->ops->pcs_ops->pcs_init(chip, port);
+ if (err)
+ return err;
+ }
+
return mv88e6xxx_setup_devlink_regions_port(ds, port);
}
static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
{
+ struct mv88e6xxx_chip *chip = ds->priv;
+
mv88e6xxx_teardown_devlink_regions_port(ds, port);
+
+ if (chip->info->ops->pcs_ops->pcs_teardown)
+ chip->info->ops->pcs_ops->pcs_teardown(chip, port);
}
static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
@@ -4228,15 +4003,13 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.stats_get_strings = mv88e6095_stats_get_strings,
.stats_get_stats = mv88e6095_stats_get_stats,
.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
- .serdes_power = mv88e6185_serdes_power,
- .serdes_get_lane = mv88e6185_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6185_serdes_pcs_get_state,
.ppu_enable = mv88e6185_g1_ppu_enable,
.ppu_disable = mv88e6185_g1_ppu_disable,
.reset = mv88e6185_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
.phylink_get_caps = mv88e6095_phylink_get_caps,
+ .pcs_ops = &mv88e6185_pcs_ops,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -4274,18 +4047,14 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.set_egress_port = mv88e6095_g1_set_egress_port,
.watchdog_ops = &mv88e6097_watchdog_ops,
.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
- .serdes_power = mv88e6185_serdes_power,
- .serdes_get_lane = mv88e6185_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6185_serdes_pcs_get_state,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6097_serdes_irq_enable,
- .serdes_irq_status = mv88e6097_serdes_irq_status,
.pot_clear = mv88e6xxx_g2_pot_clear,
.reset = mv88e6352_g1_reset,
.rmu_disable = mv88e6085_g1_rmu_disable,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.phylink_get_caps = mv88e6095_phylink_get_caps,
+ .pcs_ops = &mv88e6185_pcs_ops,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
@@ -4421,16 +4190,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6341_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.gpio_ops = &mv88e6352_gpio_ops,
.serdes_get_sset_count = mv88e6390_serdes_get_sset_count,
.serdes_get_strings = mv88e6390_serdes_get_strings,
@@ -4438,6 +4199,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.phylink_get_caps = mv88e6341_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -4618,16 +4380,11 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_get_lane = mv88e6352_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6352_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up,
- .serdes_power = mv88e6352_serdes_power,
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
.phylink_get_caps = mv88e6352_phylink_get_caps,
+ .pcs_ops = &mv88e6352_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -4723,20 +4480,13 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_get_lane = mv88e6352_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6352_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up,
- .serdes_power = mv88e6352_serdes_power,
.serdes_irq_mapping = mv88e6352_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6352_serdes_irq_enable,
- .serdes_irq_status = mv88e6352_serdes_irq_status,
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
.gpio_ops = &mv88e6352_gpio_ops,
.phylink_get_caps = mv88e6352_phylink_get_caps,
+ .pcs_ops = &mv88e6352_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -4766,9 +4516,6 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
.set_egress_port = mv88e6095_g1_set_egress_port,
.watchdog_ops = &mv88e6097_watchdog_ops,
.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
- .serdes_power = mv88e6185_serdes_power,
- .serdes_get_lane = mv88e6185_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6185_serdes_pcs_get_state,
.set_cascade_port = mv88e6185_g1_set_cascade_port,
.ppu_enable = mv88e6185_g1_ppu_enable,
.ppu_disable = mv88e6185_g1_ppu_disable,
@@ -4776,6 +4523,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
.phylink_get_caps = mv88e6185_phylink_get_caps,
+ .pcs_ops = &mv88e6185_pcs_ops,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -4826,22 +4574,15 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.serdes_get_strings = mv88e6390_serdes_get_strings,
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
.phylink_get_caps = mv88e6390_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -4891,22 +4632,15 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390x_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.serdes_get_strings = mv88e6390_serdes_get_strings,
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
.phylink_get_caps = mv88e6390x_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -4954,16 +4688,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.serdes_get_strings = mv88e6390_serdes_get_strings,
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
@@ -4971,6 +4697,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
.phylink_get_caps = mv88e6390_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6240_ops = {
@@ -5020,15 +4747,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_get_lane = mv88e6352_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6352_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up,
- .serdes_power = mv88e6352_serdes_power,
.serdes_irq_mapping = mv88e6352_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6352_serdes_irq_enable,
- .serdes_irq_status = mv88e6352_serdes_irq_status,
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
@@ -5036,6 +4755,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
.phylink_get_caps = mv88e6352_phylink_get_caps,
+ .pcs_ops = &mv88e6352_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6250_ops = {
@@ -5127,16 +4847,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.serdes_get_strings = mv88e6390_serdes_get_strings,
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
@@ -5145,6 +4857,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6390_ptp_ops,
.phylink_get_caps = mv88e6390_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -5289,16 +5002,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6341_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
@@ -5308,6 +5013,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.phylink_get_caps = mv88e6341_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -5451,15 +5157,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.stu_getnext = mv88e6352_g1_stu_getnext,
.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
- .serdes_get_lane = mv88e6352_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6352_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6352_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6352_serdes_pcs_link_up,
- .serdes_power = mv88e6352_serdes_power,
.serdes_irq_mapping = mv88e6352_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6352_serdes_irq_enable,
- .serdes_irq_status = mv88e6352_serdes_irq_status,
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
@@ -5470,6 +5168,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
.serdes_get_regs = mv88e6352_serdes_get_regs,
.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
.phylink_get_caps = mv88e6352_phylink_get_caps,
+ .pcs_ops = &mv88e6352_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -5520,16 +5219,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390_serdes_get_lane,
- /* Check status register pause & lpa register */
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6390_ptp_ops,
@@ -5539,6 +5230,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.phylink_get_caps = mv88e6390_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -5589,15 +5281,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6390_serdes_power,
.serdes_get_lane = mv88e6390x_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6390_serdes_irq_enable,
- .serdes_irq_status = mv88e6390_serdes_irq_status,
.serdes_get_sset_count = mv88e6390_serdes_get_sset_count,
.serdes_get_strings = mv88e6390_serdes_get_strings,
.serdes_get_stats = mv88e6390_serdes_get_stats,
@@ -5607,11 +5292,11 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6390_ptp_ops,
.phylink_get_caps = mv88e6390x_phylink_get_caps,
+ .pcs_ops = &mv88e6390_pcs_ops,
};
static const struct mv88e6xxx_ops mv88e6393x_ops = {
/* MV88E6XXX_FAMILY_6393 */
- .setup_errata = mv88e6393x_serdes_setup_errata,
.irl_init_all = mv88e6390_g2_irl_init_all,
.get_eeprom = mv88e6xxx_g2_get_eeprom8,
.set_eeprom = mv88e6xxx_g2_set_eeprom8,
@@ -5661,20 +5346,14 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
.stu_getnext = mv88e6390_g1_stu_getnext,
.stu_loadpurge = mv88e6390_g1_stu_loadpurge,
- .serdes_power = mv88e6393x_serdes_power,
.serdes_get_lane = mv88e6393x_serdes_get_lane,
- .serdes_pcs_get_state = mv88e6393x_serdes_pcs_get_state,
- .serdes_pcs_config = mv88e6390_serdes_pcs_config,
- .serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
- .serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
- .serdes_irq_enable = mv88e6393x_serdes_irq_enable,
- .serdes_irq_status = mv88e6393x_serdes_irq_status,
/* TODO: serdes stats */
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
.phylink_get_caps = mv88e6393x_phylink_get_caps,
+ .pcs_ops = &mv88e6393x_pcs_ops,
};
static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -7106,18 +6785,15 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.port_setup = mv88e6xxx_port_setup,
.port_teardown = mv88e6xxx_port_teardown,
.phylink_get_caps = mv88e6xxx_get_caps,
- .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state,
+ .phylink_mac_select_pcs = mv88e6xxx_mac_select_pcs,
.phylink_mac_prepare = mv88e6xxx_mac_prepare,
.phylink_mac_config = mv88e6xxx_mac_config,
.phylink_mac_finish = mv88e6xxx_mac_finish,
- .phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart,
.phylink_mac_link_down = mv88e6xxx_mac_link_down,
.phylink_mac_link_up = mv88e6xxx_mac_link_up,
.get_strings = mv88e6xxx_get_strings,
.get_ethtool_stats = mv88e6xxx_get_ethtool_stats,
.get_sset_count = mv88e6xxx_get_sset_count,
- .port_enable = mv88e6xxx_port_enable,
- .port_disable = mv88e6xxx_port_disable,
.port_max_mtu = mv88e6xxx_get_max_mtu,
.port_change_mtu = mv88e6xxx_change_mtu,
.get_mac_eee = mv88e6xxx_get_mac_eee,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 0ad34b2d8913..44383a03ef2f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -205,6 +205,7 @@ struct mv88e6xxx_irq_ops;
struct mv88e6xxx_gpio_ops;
struct mv88e6xxx_avb_ops;
struct mv88e6xxx_ptp_ops;
+struct mv88e6xxx_pcs_ops;
struct mv88e6xxx_irq {
u16 masked;
@@ -285,9 +286,8 @@ struct mv88e6xxx_port {
u8 cmode;
bool mirror_ingress;
bool mirror_egress;
- unsigned int serdes_irq;
- char serdes_irq_name[64];
struct devlink_region *region;
+ void *pcs_private;
/* MacAuth Bypass control flag */
bool mab;
@@ -590,31 +590,12 @@ struct mv88e6xxx_ops {
int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
- /* Power on/off a SERDES interface */
- int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, int lane,
- bool up);
-
/* SERDES lane mapping */
int (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
- int (*serdes_pcs_get_state)(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state);
- int (*serdes_pcs_config)(struct mv88e6xxx_chip *chip, int port,
- int lane, unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise);
- int (*serdes_pcs_an_restart)(struct mv88e6xxx_chip *chip, int port,
- int lane);
- int (*serdes_pcs_link_up)(struct mv88e6xxx_chip *chip, int port,
- int lane, int speed, int duplex);
-
/* SERDES interrupt handling */
unsigned int (*serdes_irq_mapping)(struct mv88e6xxx_chip *chip,
int port);
- int (*serdes_irq_enable)(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable);
- irqreturn_t (*serdes_irq_status)(struct mv88e6xxx_chip *chip, int port,
- int lane);
/* Statistics from the SERDES interface */
int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
@@ -664,6 +645,8 @@ struct mv88e6xxx_ops {
void (*phylink_get_caps)(struct mv88e6xxx_chip *chip, int port,
struct phylink_config *config);
+ const struct mv88e6xxx_pcs_ops *pcs_ops;
+
/* Max Frame Size */
int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu);
};
@@ -736,6 +719,14 @@ struct mv88e6xxx_ptp_ops {
u32 cc_mult_dem;
};
+struct mv88e6xxx_pcs_ops {
+ int (*pcs_init)(struct mv88e6xxx_chip *chip, int port);
+ void (*pcs_teardown)(struct mv88e6xxx_chip *chip, int port);
+ struct phylink_pcs *(*pcs_select)(struct mv88e6xxx_chip *chip, int port,
+ phy_interface_t mode);
+
+};
+
#define STATS_TYPE_PORT BIT(0)
#define STATS_TYPE_BANK0 BIT(1)
#define STATS_TYPE_BANK1 BIT(2)
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6185.c b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
new file mode 100644
index 000000000000..4d677f836807
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/pcs-6185.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Marvell 88E6185 family SERDES PCS support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 Andrew Lunn <andrew@lunn.ch>
+ */
+#include <linux/phylink.h>
+
+#include "global2.h"
+#include "port.h"
+#include "serdes.h"
+
+struct mv88e6185_pcs {
+ struct phylink_pcs phylink_pcs;
+ unsigned int irq;
+ char name[64];
+
+ struct mv88e6xxx_chip *chip;
+ int port;
+};
+
+static struct mv88e6185_pcs *pcs_to_mv88e6185_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct mv88e6185_pcs, phylink_pcs);
+}
+
+static irqreturn_t mv88e6185_pcs_handle_irq(int irq, void *dev_id)
+{
+ struct mv88e6185_pcs *mpcs = dev_id;
+ struct mv88e6xxx_chip *chip;
+ irqreturn_t ret = IRQ_NONE;
+ bool link_up;
+ u16 status;
+ int port;
+ int err;
+
+ chip = mpcs->chip;
+ port = mpcs->port;
+
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &status);
+ mv88e6xxx_reg_unlock(chip);
+
+ if (!err) {
+ link_up = !!(status & MV88E6XXX_PORT_STS_LINK);
+
+ phylink_pcs_change(&mpcs->phylink_pcs, link_up);
+
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static void mv88e6185_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mv88e6185_pcs *mpcs = pcs_to_mv88e6185_pcs(pcs);
+ struct mv88e6xxx_chip *chip = mpcs->chip;
+ int port = mpcs->port;
+ u16 status;
+ int err;
+
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &status);
+ mv88e6xxx_reg_unlock(chip);
+
+ if (err)
+ status = 0;
+
+ state->link = !!(status & MV88E6XXX_PORT_STS_LINK);
+ if (state->link) {
+ state->duplex = status & MV88E6XXX_PORT_STS_DUPLEX ?
+ DUPLEX_FULL : DUPLEX_HALF;
+
+ switch (status & MV88E6XXX_PORT_STS_SPEED_MASK) {
+ case MV88E6XXX_PORT_STS_SPEED_1000:
+ state->speed = SPEED_1000;
+ break;
+
+ case MV88E6XXX_PORT_STS_SPEED_100:
+ state->speed = SPEED_100;
+ break;
+
+ case MV88E6XXX_PORT_STS_SPEED_10:
+ state->speed = SPEED_10;
+ break;
+
+ default:
+ state->link = false;
+ break;
+ }
+ }
+}
+
+static int mv88e6185_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ return 0;
+}
+
+static void mv88e6185_pcs_an_restart(struct phylink_pcs *pcs)
+{
+}
+
+static const struct phylink_pcs_ops mv88e6185_phylink_pcs_ops = {
+ .pcs_get_state = mv88e6185_pcs_get_state,
+ .pcs_config = mv88e6185_pcs_config,
+ .pcs_an_restart = mv88e6185_pcs_an_restart,
+};
+
+static int mv88e6185_pcs_init(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e6185_pcs *mpcs;
+ struct device *dev;
+ unsigned int irq;
+ int err;
+
+ /* There are no configurable serdes lanes on this switch chip, so
+ * we use the static cmode configuration to determine whether we
+ * have a PCS or not.
+ */
+ if (chip->ports[port].cmode != MV88E6185_PORT_STS_CMODE_SERDES &&
+ chip->ports[port].cmode != MV88E6185_PORT_STS_CMODE_1000BASE_X)
+ return 0;
+
+ dev = chip->dev;
+
+ mpcs = kzalloc(sizeof(*mpcs), GFP_KERNEL);
+ if (!mpcs)
+ return -ENOMEM;
+
+ mpcs->chip = chip;
+ mpcs->port = port;
+ mpcs->phylink_pcs.ops = &mv88e6185_phylink_pcs_ops;
+
+ irq = mv88e6xxx_serdes_irq_mapping(chip, port);
+ if (irq) {
+ snprintf(mpcs->name, sizeof(mpcs->name),
+ "mv88e6xxx-%s-serdes-%d", dev_name(dev), port);
+
+ err = request_threaded_irq(irq, NULL, mv88e6185_pcs_handle_irq,
+ IRQF_ONESHOT, mpcs->name, mpcs);
+ if (err) {
+ kfree(mpcs);
+ return err;
+ }
+
+ mpcs->irq = irq;
+ } else {
+ mpcs->phylink_pcs.poll = true;
+ }
+
+ chip->ports[port].pcs_private = &mpcs->phylink_pcs;
+
+ return 0;
+}
+
+static void mv88e6185_pcs_teardown(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e6185_pcs *mpcs;
+
+ mpcs = chip->ports[port].pcs_private;
+ if (!mpcs)
+ return;
+
+ if (mpcs->irq)
+ free_irq(mpcs->irq, mpcs);
+
+ kfree(mpcs);
+
+ chip->ports[port].pcs_private = NULL;
+}
+
+static struct phylink_pcs *mv88e6185_pcs_select(struct mv88e6xxx_chip *chip,
+ int port,
+ phy_interface_t interface)
+{
+ return chip->ports[port].pcs_private;
+}
+
+const struct mv88e6xxx_pcs_ops mv88e6185_pcs_ops = {
+ .pcs_init = mv88e6185_pcs_init,
+ .pcs_teardown = mv88e6185_pcs_teardown,
+ .pcs_select = mv88e6185_pcs_select,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-6352.c b/drivers/net/dsa/mv88e6xxx/pcs-6352.c
new file mode 100644
index 000000000000..88f624b65470
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/pcs-6352.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Marvell 88E6352 family SERDES PCS support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 Andrew Lunn <andrew@lunn.ch>
+ */
+#include <linux/phylink.h>
+
+#include "global2.h"
+#include "port.h"
+#include "serdes.h"
+
+/* Definitions from drivers/net/phy/marvell.c, which would be good to reuse. */
+#define MII_M1011_PHY_STATUS 17
+#define MII_M1011_IMASK 18
+#define MII_M1011_IMASK_LINK_CHANGE BIT(10)
+#define MII_M1011_IEVENT 19
+#define MII_M1011_IEVENT_LINK_CHANGE BIT(10)
+#define MII_MARVELL_PHY_PAGE 22
+#define MII_MARVELL_FIBER_PAGE 1
+
+struct marvell_c22_pcs {
+ struct mdio_device mdio;
+ struct phylink_pcs phylink_pcs;
+ unsigned int irq;
+ char name[64];
+ bool (*link_check)(struct marvell_c22_pcs *mpcs);
+ struct mv88e6xxx_port *port;
+};
+
+static struct marvell_c22_pcs *pcs_to_marvell_c22_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct marvell_c22_pcs, phylink_pcs);
+}
+
+static int marvell_c22_pcs_set_fiber_page(struct marvell_c22_pcs *mpcs)
+{
+ u16 page;
+ int err;
+
+ mutex_lock(&mpcs->mdio.bus->mdio_lock);
+
+ err = __mdiodev_read(&mpcs->mdio, MII_MARVELL_PHY_PAGE);
+ if (err < 0) {
+ dev_err(mpcs->mdio.dev.parent,
+ "%s: can't read Serdes page register: %pe\n",
+ mpcs->name, ERR_PTR(err));
+ return err;
+ }
+
+ page = err;
+
+ err = __mdiodev_write(&mpcs->mdio, MII_MARVELL_PHY_PAGE,
+ MII_MARVELL_FIBER_PAGE);
+ if (err) {
+ dev_err(mpcs->mdio.dev.parent,
+ "%s: can't set Serdes page register: %pe\n",
+ mpcs->name, ERR_PTR(err));
+ return err;
+ }
+
+ return page;
+}
+
+static int marvell_c22_pcs_restore_page(struct marvell_c22_pcs *mpcs,
+ int oldpage, int ret)
+{
+ int err;
+
+ if (oldpage >= 0) {
+ err = __mdiodev_write(&mpcs->mdio, MII_MARVELL_PHY_PAGE,
+ oldpage);
+ if (err)
+ dev_err(mpcs->mdio.dev.parent,
+ "%s: can't restore Serdes page register: %pe\n",
+ mpcs->name, ERR_PTR(err));
+ if (!err || ret < 0)
+ err = ret;
+ } else {
+ err = oldpage;
+ }
+ mutex_unlock(&mpcs->mdio.bus->mdio_lock);
+
+ return err;
+}
+
+static irqreturn_t marvell_c22_pcs_handle_irq(int irq, void *dev_id)
+{
+ struct marvell_c22_pcs *mpcs = dev_id;
+ irqreturn_t status = IRQ_NONE;
+ int err, oldpage;
+
+ oldpage = marvell_c22_pcs_set_fiber_page(mpcs);
+ if (oldpage < 0)
+ goto fail;
+
+ err = __mdiodev_read(&mpcs->mdio, MII_M1011_IEVENT);
+ if (err >= 0 && err & MII_M1011_IEVENT_LINK_CHANGE) {
+ phylink_pcs_change(&mpcs->phylink_pcs, true);
+ status = IRQ_HANDLED;
+ }
+
+fail:
+ marvell_c22_pcs_restore_page(mpcs, oldpage, 0);
+
+ return status;
+}
+
+static int marvell_c22_pcs_modify(struct marvell_c22_pcs *mpcs, u8 reg,
+ u16 mask, u16 val)
+{
+ int oldpage, err = 0;
+
+ oldpage = marvell_c22_pcs_set_fiber_page(mpcs);
+ if (oldpage >= 0)
+ err = __mdiodev_modify(&mpcs->mdio, reg, mask, val);
+
+ return marvell_c22_pcs_restore_page(mpcs, oldpage, err);
+}
+
+static int marvell_c22_pcs_power(struct marvell_c22_pcs *mpcs,
+ bool on)
+{
+ u16 val = on ? 0 : BMCR_PDOWN;
+
+ return marvell_c22_pcs_modify(mpcs, MII_BMCR, BMCR_PDOWN, val);
+}
+
+static int marvell_c22_pcs_control_irq(struct marvell_c22_pcs *mpcs,
+ bool enable)
+{
+ u16 val = enable ? MII_M1011_IMASK_LINK_CHANGE : 0;
+
+ return marvell_c22_pcs_modify(mpcs, MII_M1011_IMASK,
+ MII_M1011_IMASK_LINK_CHANGE, val);
+}
+
+static int marvell_c22_pcs_enable(struct phylink_pcs *pcs)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+ int err;
+
+ err = marvell_c22_pcs_power(mpcs, true);
+ if (err)
+ return err;
+
+ return marvell_c22_pcs_control_irq(mpcs, !!mpcs->irq);
+}
+
+static void marvell_c22_pcs_disable(struct phylink_pcs *pcs)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+
+ marvell_c22_pcs_control_irq(mpcs, false);
+ marvell_c22_pcs_power(mpcs, false);
+}
+
+static void marvell_c22_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+ int oldpage, bmsr, lpa, status;
+
+ state->link = false;
+
+ if (mpcs->link_check && !mpcs->link_check(mpcs))
+ return;
+
+ oldpage = marvell_c22_pcs_set_fiber_page(mpcs);
+ if (oldpage >= 0) {
+ bmsr = __mdiodev_read(&mpcs->mdio, MII_BMSR);
+ lpa = __mdiodev_read(&mpcs->mdio, MII_LPA);
+ status = __mdiodev_read(&mpcs->mdio, MII_M1011_PHY_STATUS);
+ }
+
+ if (marvell_c22_pcs_restore_page(mpcs, oldpage, 0) >= 0 &&
+ bmsr >= 0 && lpa >= 0 && status >= 0)
+ mv88e6xxx_pcs_decode_state(mpcs->mdio.dev.parent, bmsr, lpa,
+ status, state);
+}
+
+static int marvell_c22_pcs_config(struct phylink_pcs *pcs,
+ unsigned int neg_mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+ int oldpage, adv, err, ret = 0;
+ u16 bmcr;
+
+ adv = phylink_mii_c22_pcs_encode_advertisement(interface, advertising);
+ if (adv < 0)
+ return 0;
+
+ bmcr = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED ? BMCR_ANENABLE : 0;
+
+ oldpage = marvell_c22_pcs_set_fiber_page(mpcs);
+ if (oldpage < 0)
+ goto restore;
+
+ err = __mdiodev_modify_changed(&mpcs->mdio, MII_ADVERTISE, 0xffff, adv);
+ ret = err;
+ if (err < 0)
+ goto restore;
+
+ err = __mdiodev_modify_changed(&mpcs->mdio, MII_BMCR, BMCR_ANENABLE,
+ bmcr);
+ if (err < 0) {
+ ret = err;
+ goto restore;
+ }
+
+ /* If the ANENABLE bit was changed, the PHY will restart negotiation,
+ * so we don't need to flag a change to trigger its own restart.
+ */
+ if (err)
+ ret = 0;
+
+restore:
+ return marvell_c22_pcs_restore_page(mpcs, oldpage, ret);
+}
+
+static void marvell_c22_pcs_an_restart(struct phylink_pcs *pcs)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+
+ marvell_c22_pcs_modify(mpcs, MII_BMCR, BMCR_ANRESTART, BMCR_ANRESTART);
+}
+
+static void marvell_c22_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface, int speed,
+ int duplex)
+{
+ struct marvell_c22_pcs *mpcs = pcs_to_marvell_c22_pcs(pcs);
+ u16 bmcr;
+ int err;
+
+ if (phylink_autoneg_inband(mode))
+ return;
+
+ bmcr = mii_bmcr_encode_fixed(speed, duplex);
+
+ err = marvell_c22_pcs_modify(mpcs, MII_BMCR, BMCR_SPEED100 |
+ BMCR_FULLDPLX | BMCR_SPEED1000, bmcr);
+ if (err)
+ dev_err(mpcs->mdio.dev.parent,
+ "%s: failed to configure mpcs: %pe\n", mpcs->name,
+ ERR_PTR(err));
+}
+
+static const struct phylink_pcs_ops marvell_c22_pcs_ops = {
+ .pcs_enable = marvell_c22_pcs_enable,
+ .pcs_disable = marvell_c22_pcs_disable,
+ .pcs_get_state = marvell_c22_pcs_get_state,
+ .pcs_config = marvell_c22_pcs_config,
+ .pcs_an_restart = marvell_c22_pcs_an_restart,
+ .pcs_link_up = marvell_c22_pcs_link_up,
+};
+
+static struct marvell_c22_pcs *marvell_c22_pcs_alloc(struct device *dev,
+ struct mii_bus *bus,
+ unsigned int addr)
+{
+ struct marvell_c22_pcs *mpcs;
+
+ mpcs = kzalloc(sizeof(*mpcs), GFP_KERNEL);
+ if (!mpcs)
+ return NULL;
+
+ mpcs->mdio.dev.parent = dev;
+ mpcs->mdio.bus = bus;
+ mpcs->mdio.addr = addr;
+ mpcs->phylink_pcs.ops = &marvell_c22_pcs_ops;
+ mpcs->phylink_pcs.neg_mode = true;
+
+ return mpcs;
+}
+
+static int marvell_c22_pcs_setup_irq(struct marvell_c22_pcs *mpcs,
+ unsigned int irq)
+{
+ int err;
+
+ mpcs->phylink_pcs.poll = !irq;
+ mpcs->irq = irq;
+
+ if (irq) {
+ err = request_threaded_irq(irq, NULL,
+ marvell_c22_pcs_handle_irq,
+ IRQF_ONESHOT, mpcs->name, mpcs);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* mv88e6352 specifics */
+
+static bool mv88e6352_pcs_link_check(struct marvell_c22_pcs *mpcs)
+{
+ struct mv88e6xxx_port *port = mpcs->port;
+ struct mv88e6xxx_chip *chip = port->chip;
+ u8 cmode;
+
+ /* Port 4 can be in auto-media mode. Check that the port is
+ * associated with the mpcs.
+ */
+ mv88e6xxx_reg_lock(chip);
+ chip->info->ops->port_get_cmode(chip, port->port, &cmode);
+ mv88e6xxx_reg_unlock(chip);
+
+ return cmode == MV88E6XXX_PORT_STS_CMODE_100BASEX ||
+ cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
+ cmode == MV88E6XXX_PORT_STS_CMODE_SGMII;
+}
+
+static int mv88e6352_pcs_init(struct mv88e6xxx_chip *chip, int port)
+{
+ struct marvell_c22_pcs *mpcs;
+ struct mii_bus *bus;
+ struct device *dev;
+ unsigned int irq;
+ int err;
+
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ mv88e6xxx_reg_unlock(chip);
+ if (err <= 0)
+ return err;
+
+ irq = mv88e6xxx_serdes_irq_mapping(chip, port);
+ bus = mv88e6xxx_default_mdio_bus(chip);
+ dev = chip->dev;
+
+ mpcs = marvell_c22_pcs_alloc(dev, bus, MV88E6352_ADDR_SERDES);
+ if (!mpcs)
+ return -ENOMEM;
+
+ snprintf(mpcs->name, sizeof(mpcs->name),
+ "mv88e6xxx-%s-serdes-%d", dev_name(dev), port);
+
+ mpcs->link_check = mv88e6352_pcs_link_check;
+ mpcs->port = &chip->ports[port];
+
+ err = marvell_c22_pcs_setup_irq(mpcs, irq);
+ if (err) {
+ kfree(mpcs);
+ return err;
+ }
+
+ chip->ports[port].pcs_private = &mpcs->phylink_pcs;
+
+ return 0;
+}
+
+static void mv88e6352_pcs_teardown(struct mv88e6xxx_chip *chip, int port)
+{
+ struct marvell_c22_pcs *mpcs;
+ struct phylink_pcs *pcs;
+
+ pcs = chip->ports[port].pcs_private;
+ if (!pcs)
+ return;
+
+ mpcs = pcs_to_marvell_c22_pcs(pcs);
+
+ if (mpcs->irq)
+ free_irq(mpcs->irq, mpcs);
+
+ kfree(mpcs);
+
+ chip->ports[port].pcs_private = NULL;
+}
+
+static struct phylink_pcs *mv88e6352_pcs_select(struct mv88e6xxx_chip *chip,
+ int port,
+ phy_interface_t interface)
+{
+ return chip->ports[port].pcs_private;
+}
+
+const struct mv88e6xxx_pcs_ops mv88e6352_pcs_ops = {
+ .pcs_init = mv88e6352_pcs_init,
+ .pcs_teardown = mv88e6352_pcs_teardown,
+ .pcs_select = mv88e6352_pcs_select,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
new file mode 100644
index 000000000000..98dd49dac421
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -0,0 +1,898 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Marvell 88E6352 family SERDES PCS support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 Andrew Lunn <andrew@lunn.ch>
+ */
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/mii.h>
+
+#include "chip.h"
+#include "global2.h"
+#include "phy.h"
+#include "port.h"
+#include "serdes.h"
+
+struct mv88e639x_pcs {
+ struct mdio_device mdio;
+ struct phylink_pcs sgmii_pcs;
+ struct phylink_pcs xg_pcs;
+ bool supports_5g;
+ phy_interface_t interface;
+ unsigned int irq;
+ char name[64];
+ irqreturn_t (*handle_irq)(struct mv88e639x_pcs *mpcs);
+};
+
+static int mv88e639x_read(struct mv88e639x_pcs *mpcs, u16 regnum, u16 *val)
+{
+ int err;
+
+ err = mdiodev_c45_read(&mpcs->mdio, MDIO_MMD_PHYXS, regnum);
+ if (err < 0)
+ return err;
+
+ *val = err;
+
+ return 0;
+}
+
+static int mv88e639x_write(struct mv88e639x_pcs *mpcs, u16 regnum, u16 val)
+{
+ return mdiodev_c45_write(&mpcs->mdio, MDIO_MMD_PHYXS, regnum, val);
+}
+
+static int mv88e639x_modify(struct mv88e639x_pcs *mpcs, u16 regnum, u16 mask,
+ u16 val)
+{
+ return mdiodev_c45_modify(&mpcs->mdio, MDIO_MMD_PHYXS, regnum, mask,
+ val);
+}
+
+static int mv88e639x_modify_changed(struct mv88e639x_pcs *mpcs, u16 regnum,
+ u16 mask, u16 set)
+{
+ return mdiodev_c45_modify_changed(&mpcs->mdio, MDIO_MMD_PHYXS, regnum,
+ mask, set);
+}
+
+static struct mv88e639x_pcs *
+mv88e639x_pcs_alloc(struct device *dev, struct mii_bus *bus, unsigned int addr,
+ int port)
+{
+ struct mv88e639x_pcs *mpcs;
+
+ mpcs = kzalloc(sizeof(*mpcs), GFP_KERNEL);
+ if (!mpcs)
+ return NULL;
+
+ mpcs->mdio.dev.parent = dev;
+ mpcs->mdio.bus = bus;
+ mpcs->mdio.addr = addr;
+
+ snprintf(mpcs->name, sizeof(mpcs->name),
+ "mv88e6xxx-%s-serdes-%d", dev_name(dev), port);
+
+ return mpcs;
+}
+
+static irqreturn_t mv88e639x_pcs_handle_irq(int irq, void *dev_id)
+{
+ struct mv88e639x_pcs *mpcs = dev_id;
+ irqreturn_t (*handler)(struct mv88e639x_pcs *);
+
+ handler = READ_ONCE(mpcs->handle_irq);
+ if (!handler)
+ return IRQ_NONE;
+
+ return handler(mpcs);
+}
+
+static int mv88e639x_pcs_setup_irq(struct mv88e639x_pcs *mpcs,
+ struct mv88e6xxx_chip *chip, int port)
+{
+ unsigned int irq;
+
+ irq = mv88e6xxx_serdes_irq_mapping(chip, port);
+ if (!irq) {
+ /* Use polling mode */
+ mpcs->sgmii_pcs.poll = true;
+ mpcs->xg_pcs.poll = true;
+ return 0;
+ }
+
+ mpcs->irq = irq;
+
+ return request_threaded_irq(irq, NULL, mv88e639x_pcs_handle_irq,
+ IRQF_ONESHOT, mpcs->name, mpcs);
+}
+
+static void mv88e639x_pcs_teardown(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e639x_pcs *mpcs = chip->ports[port].pcs_private;
+
+ if (!mpcs)
+ return;
+
+ if (mpcs->irq)
+ free_irq(mpcs->irq, mpcs);
+
+ kfree(mpcs);
+
+ chip->ports[port].pcs_private = NULL;
+}
+
+static struct mv88e639x_pcs *sgmii_pcs_to_mv88e639x_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct mv88e639x_pcs, sgmii_pcs);
+}
+
+static irqreturn_t mv88e639x_sgmii_handle_irq(struct mv88e639x_pcs *mpcs)
+{
+ u16 int_status;
+ int err;
+
+ err = mv88e639x_read(mpcs, MV88E6390_SGMII_INT_STATUS, &int_status);
+ if (err)
+ return IRQ_NONE;
+
+ if (int_status & (MV88E6390_SGMII_INT_LINK_DOWN |
+ MV88E6390_SGMII_INT_LINK_UP)) {
+ phylink_pcs_change(&mpcs->sgmii_pcs,
+ int_status & MV88E6390_SGMII_INT_LINK_UP);
+
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int mv88e639x_sgmii_pcs_control_irq(struct mv88e639x_pcs *mpcs,
+ bool enable)
+{
+ u16 val = 0;
+
+ if (enable)
+ val |= MV88E6390_SGMII_INT_LINK_DOWN |
+ MV88E6390_SGMII_INT_LINK_UP;
+
+ return mv88e639x_modify(mpcs, MV88E6390_SGMII_INT_ENABLE,
+ MV88E6390_SGMII_INT_LINK_DOWN |
+ MV88E6390_SGMII_INT_LINK_UP, val);
+}
+
+static int mv88e639x_sgmii_pcs_control_pwr(struct mv88e639x_pcs *mpcs,
+ bool enable)
+{
+ u16 mask, val;
+
+ if (enable) {
+ mask = BMCR_RESET | BMCR_LOOPBACK | BMCR_PDOWN;
+ val = 0;
+ } else {
+ mask = val = BMCR_PDOWN;
+ }
+
+ return mv88e639x_modify(mpcs, MV88E6390_SGMII_BMCR, mask, val);
+}
+
+static int mv88e639x_sgmii_pcs_enable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ /* power enable done in post_config */
+ mpcs->handle_irq = mv88e639x_sgmii_handle_irq;
+
+ return mv88e639x_sgmii_pcs_control_irq(mpcs, !!mpcs->irq);
+}
+
+static void mv88e639x_sgmii_pcs_disable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_sgmii_pcs_control_irq(mpcs, false);
+ mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
+}
+
+static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
+}
+
+static int mv88e639x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_sgmii_pcs_control_pwr(mpcs, true);
+
+ return 0;
+}
+
+static void mv88e639x_sgmii_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+ u16 bmsr, lpa, status;
+ int err;
+
+ err = mv88e639x_read(mpcs, MV88E6390_SGMII_BMSR, &bmsr);
+ if (err) {
+ dev_err(mpcs->mdio.dev.parent,
+ "can't read Serdes PHY %s: %pe\n",
+ "BMSR", ERR_PTR(err));
+ state->link = false;
+ return;
+ }
+
+ err = mv88e639x_read(mpcs, MV88E6390_SGMII_LPA, &lpa);
+ if (err) {
+ dev_err(mpcs->mdio.dev.parent,
+ "can't read Serdes PHY %s: %pe\n",
+ "LPA", ERR_PTR(err));
+ state->link = false;
+ return;
+ }
+
+ err = mv88e639x_read(mpcs, MV88E6390_SGMII_PHY_STATUS, &status);
+ if (err) {
+ dev_err(mpcs->mdio.dev.parent,
+ "can't read Serdes PHY %s: %pe\n",
+ "status", ERR_PTR(err));
+ state->link = false;
+ return;
+ }
+
+ mv88e6xxx_pcs_decode_state(mpcs->mdio.dev.parent, bmsr, lpa, status,
+ state);
+}
+
+static int mv88e639x_sgmii_pcs_config(struct phylink_pcs *pcs,
+ unsigned int neg_mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+ u16 val, bmcr;
+ bool changed;
+ int adv, err;
+
+ adv = phylink_mii_c22_pcs_encode_advertisement(interface, advertising);
+ if (adv < 0)
+ return 0;
+
+ mpcs->interface = interface;
+
+ err = mv88e639x_modify_changed(mpcs, MV88E6390_SGMII_ADVERTISE,
+ 0xffff, adv);
+ if (err < 0)
+ return err;
+
+ changed = err > 0;
+
+ err = mv88e639x_read(mpcs, MV88E6390_SGMII_BMCR, &val);
+ if (err)
+ return err;
+
+ if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+ bmcr = val | BMCR_ANENABLE;
+ else
+ bmcr = val & ~BMCR_ANENABLE;
+
+ /* setting ANENABLE triggers a restart of negotiation */
+ if (bmcr == val)
+ return changed;
+
+ return mv88e639x_write(mpcs, MV88E6390_SGMII_BMCR, bmcr);
+}
+
+static void mv88e639x_sgmii_pcs_an_restart(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_modify(mpcs, MV88E6390_SGMII_BMCR,
+ BMCR_ANRESTART, BMCR_ANRESTART);
+}
+
+static void mv88e639x_sgmii_pcs_link_up(struct phylink_pcs *pcs,
+ unsigned int mode,
+ phy_interface_t interface,
+ int speed, int duplex)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+ u16 bmcr;
+ int err;
+
+ if (phylink_autoneg_inband(mode))
+ return;
+
+ bmcr = mii_bmcr_encode_fixed(speed, duplex);
+
+ err = mv88e639x_modify(mpcs, MV88E6390_SGMII_BMCR,
+ BMCR_SPEED1000 | BMCR_SPEED100 | BMCR_FULLDPLX,
+ bmcr);
+ if (err)
+ dev_err(mpcs->mdio.dev.parent,
+ "can't access Serdes PHY %s: %pe\n",
+ "BMCR", ERR_PTR(err));
+}
+
+static const struct phylink_pcs_ops mv88e639x_sgmii_pcs_ops = {
+ .pcs_enable = mv88e639x_sgmii_pcs_enable,
+ .pcs_disable = mv88e639x_sgmii_pcs_disable,
+ .pcs_pre_config = mv88e639x_sgmii_pcs_pre_config,
+ .pcs_post_config = mv88e639x_sgmii_pcs_post_config,
+ .pcs_get_state = mv88e639x_sgmii_pcs_get_state,
+ .pcs_an_restart = mv88e639x_sgmii_pcs_an_restart,
+ .pcs_config = mv88e639x_sgmii_pcs_config,
+ .pcs_link_up = mv88e639x_sgmii_pcs_link_up,
+};
+
+static struct mv88e639x_pcs *xg_pcs_to_mv88e639x_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct mv88e639x_pcs, xg_pcs);
+}
+
+static int mv88e639x_xg_pcs_enable(struct mv88e639x_pcs *mpcs)
+{
+ return mv88e639x_modify(mpcs, MV88E6390_10G_CTRL1,
+ MDIO_CTRL1_RESET | MDIO_PCS_CTRL1_LOOPBACK |
+ MDIO_CTRL1_LPOWER, 0);
+}
+
+static void mv88e639x_xg_pcs_disable(struct mv88e639x_pcs *mpcs)
+{
+ mv88e639x_modify(mpcs, MV88E6390_10G_CTRL1, MDIO_CTRL1_LPOWER,
+ MDIO_CTRL1_LPOWER);
+}
+
+static void mv88e639x_xg_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+ u16 status;
+ int err;
+
+ state->link = false;
+
+ err = mv88e639x_read(mpcs, MV88E6390_10G_STAT1, &status);
+ if (err) {
+ dev_err(mpcs->mdio.dev.parent,
+ "can't read Serdes PHY %s: %pe\n",
+ "STAT1", ERR_PTR(err));
+ return;
+ }
+
+ state->link = !!(status & MDIO_STAT1_LSTATUS);
+ if (state->link) {
+ switch (state->interface) {
+ case PHY_INTERFACE_MODE_5GBASER:
+ state->speed = SPEED_5000;
+ break;
+
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_RXAUI:
+ case PHY_INTERFACE_MODE_XAUI:
+ state->speed = SPEED_10000;
+ break;
+
+ default:
+ state->link = false;
+ return;
+ }
+
+ state->duplex = DUPLEX_FULL;
+ }
+}
+
+static int mv88e639x_xg_pcs_config(struct phylink_pcs *pcs,
+ unsigned int neg_mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ return 0;
+}
+
+static struct phylink_pcs *
+mv88e639x_pcs_select(struct mv88e6xxx_chip *chip, int port,
+ phy_interface_t mode)
+{
+ struct mv88e639x_pcs *mpcs;
+
+ mpcs = chip->ports[port].pcs_private;
+ if (!mpcs)
+ return NULL;
+
+ switch (mode) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return &mpcs->sgmii_pcs;
+
+ case PHY_INTERFACE_MODE_5GBASER:
+ if (!mpcs->supports_5g)
+ return NULL;
+ fallthrough;
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_XAUI:
+ case PHY_INTERFACE_MODE_RXAUI:
+ return &mpcs->xg_pcs;
+
+ default:
+ return NULL;
+ }
+}
+
+/* Marvell 88E6390 Specific support */
+
+static irqreturn_t mv88e6390_xg_handle_irq(struct mv88e639x_pcs *mpcs)
+{
+ u16 int_status;
+ int err;
+
+ err = mv88e639x_read(mpcs, MV88E6390_10G_INT_STATUS, &int_status);
+ if (err)
+ return IRQ_NONE;
+
+ if (int_status & (MV88E6390_10G_INT_LINK_DOWN |
+ MV88E6390_10G_INT_LINK_UP)) {
+ phylink_pcs_change(&mpcs->xg_pcs,
+ int_status & MV88E6390_10G_INT_LINK_UP);
+
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int mv88e6390_xg_control_irq(struct mv88e639x_pcs *mpcs, bool enable)
+{
+ u16 val = 0;
+
+ if (enable)
+ val = MV88E6390_10G_INT_LINK_DOWN | MV88E6390_10G_INT_LINK_UP;
+
+ return mv88e639x_modify(mpcs, MV88E6390_10G_INT_ENABLE,
+ MV88E6390_10G_INT_LINK_DOWN |
+ MV88E6390_10G_INT_LINK_UP, val);
+}
+
+static int mv88e6390_xg_pcs_enable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+ int err;
+
+ err = mv88e639x_xg_pcs_enable(mpcs);
+ if (err)
+ return err;
+
+ mpcs->handle_irq = mv88e6390_xg_handle_irq;
+
+ return mv88e6390_xg_control_irq(mpcs, !!mpcs->irq);
+}
+
+static void mv88e6390_xg_pcs_disable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e6390_xg_control_irq(mpcs, false);
+ mv88e639x_xg_pcs_disable(mpcs);
+}
+
+static const struct phylink_pcs_ops mv88e6390_xg_pcs_ops = {
+ .pcs_enable = mv88e6390_xg_pcs_enable,
+ .pcs_disable = mv88e6390_xg_pcs_disable,
+ .pcs_get_state = mv88e639x_xg_pcs_get_state,
+ .pcs_config = mv88e639x_xg_pcs_config,
+};
+
+static int mv88e6390_pcs_enable_checker(struct mv88e639x_pcs *mpcs)
+{
+ return mv88e639x_modify(mpcs, MV88E6390_PG_CONTROL,
+ MV88E6390_PG_CONTROL_ENABLE_PC,
+ MV88E6390_PG_CONTROL_ENABLE_PC);
+}
+
+static int mv88e6390_pcs_init(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e639x_pcs *mpcs;
+ struct mii_bus *bus;
+ struct device *dev;
+ int lane, err;
+
+ lane = mv88e6xxx_serdes_get_lane(chip, port);
+ if (lane < 0)
+ return 0;
+
+ bus = mv88e6xxx_default_mdio_bus(chip);
+ dev = chip->dev;
+
+ mpcs = mv88e639x_pcs_alloc(dev, bus, lane, port);
+ if (!mpcs)
+ return -ENOMEM;
+
+ mpcs->sgmii_pcs.ops = &mv88e639x_sgmii_pcs_ops;
+ mpcs->sgmii_pcs.neg_mode = true;
+ mpcs->xg_pcs.ops = &mv88e6390_xg_pcs_ops;
+ mpcs->xg_pcs.neg_mode = true;
+
+ err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
+ if (err)
+ goto err_free;
+
+ /* 6390 and 6390x has the checker, 6393x doesn't appear to? */
+ /* This is to enable gathering the statistics. Maybe this
+ * should call out to a helper? Or we could do this at init time.
+ */
+ err = mv88e6390_pcs_enable_checker(mpcs);
+ if (err)
+ goto err_free;
+
+ chip->ports[port].pcs_private = mpcs;
+
+ return 0;
+
+err_free:
+ kfree(mpcs);
+ return err;
+}
+
+const struct mv88e6xxx_pcs_ops mv88e6390_pcs_ops = {
+ .pcs_init = mv88e6390_pcs_init,
+ .pcs_teardown = mv88e639x_pcs_teardown,
+ .pcs_select = mv88e639x_pcs_select,
+};
+
+/* Marvell 88E6393X Specific support */
+
+static int mv88e6393x_power_lane(struct mv88e639x_pcs *mpcs, bool enable)
+{
+ u16 val = MV88E6393X_SERDES_CTRL1_TX_PDOWN |
+ MV88E6393X_SERDES_CTRL1_RX_PDOWN;
+
+ return mv88e639x_modify(mpcs, MV88E6393X_SERDES_CTRL1, val,
+ enable ? 0 : val);
+}
+
+/* mv88e6393x family errata 4.6:
+ * Cannot clear PwrDn bit on SERDES if device is configured CPU_MGD mode or
+ * P0_mode is configured for [x]MII.
+ * Workaround: Set SERDES register 4.F002 bit 5=0 and bit 15=1.
+ *
+ * It seems that after this workaround the SERDES is automatically powered up
+ * (the bit is cleared), so power it down.
+ */
+static int mv88e6393x_erratum_4_6(struct mv88e639x_pcs *mpcs)
+{
+ int err;
+
+ err = mv88e639x_modify(mpcs, MV88E6393X_SERDES_POC,
+ MV88E6393X_SERDES_POC_PDOWN |
+ MV88E6393X_SERDES_POC_RESET,
+ MV88E6393X_SERDES_POC_RESET);
+ if (err)
+ return err;
+
+ err = mv88e639x_modify(mpcs, MV88E6390_SGMII_BMCR,
+ BMCR_PDOWN, BMCR_PDOWN);
+ if (err)
+ return err;
+
+ err = mv88e639x_sgmii_pcs_control_pwr(mpcs, false);
+ if (err)
+ return err;
+
+ return mv88e6393x_power_lane(mpcs, false);
+}
+
+/* mv88e6393x family errata 4.8:
+ * When a SERDES port is operating in 1000BASE-X or SGMII mode link may not
+ * come up after hardware reset or software reset of SERDES core. Workaround
+ * is to write SERDES register 4.F074.14=1 for only those modes and 0 in all
+ * other modes.
+ */
+static int mv88e6393x_erratum_4_8(struct mv88e639x_pcs *mpcs)
+{
+ u16 reg, poc;
+ int err;
+
+ err = mv88e639x_read(mpcs, MV88E6393X_SERDES_POC, &poc);
+ if (err)
+ return err;
+
+ poc &= MV88E6393X_SERDES_POC_PCS_MASK;
+ if (poc == MV88E6393X_SERDES_POC_PCS_1000BASEX ||
+ poc == MV88E6393X_SERDES_POC_PCS_SGMII_PHY ||
+ poc == MV88E6393X_SERDES_POC_PCS_SGMII_MAC)
+ reg = MV88E6393X_ERRATA_4_8_BIT;
+ else
+ reg = 0;
+
+ return mv88e639x_modify(mpcs, MV88E6393X_ERRATA_4_8_REG,
+ MV88E6393X_ERRATA_4_8_BIT, reg);
+}
+
+/* mv88e6393x family errata 5.2:
+ * For optimal signal integrity the following sequence should be applied to
+ * SERDES operating in 10G mode. These registers only apply to 10G operation
+ * and have no effect on other speeds.
+ */
+static int mv88e6393x_erratum_5_2(struct mv88e639x_pcs *mpcs)
+{
+ static const struct {
+ u16 dev, reg, val, mask;
+ } fixes[] = {
+ { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff },
+ { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff },
+ { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff },
+ { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f },
+ { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 },
+ { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff },
+ { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC,
+ MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET },
+ };
+ int err, i;
+
+ for (i = 0; i < ARRAY_SIZE(fixes); ++i) {
+ err = mdiodev_c45_modify(&mpcs->mdio, fixes[i].dev,
+ fixes[i].reg, fixes[i].mask,
+ fixes[i].val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* Inband AN is broken on Amethyst in 2500base-x mode when set by standard
+ * mechanism (via cmode).
+ * We can get around this by configuring the PCS mode to 1000base-x and then
+ * writing value 0x58 to register 1e.8000. (This must be done while SerDes
+ * receiver and transmitter are disabled, which is, when this function is
+ * called.)
+ * It seem that when we do this configuration to 2500base-x mode (by changing
+ * PCS mode to 1000base-x and frequency to 3.125 GHz from 1.25 GHz) and then
+ * configure to sgmii or 1000base-x, the device thinks that it already has
+ * SerDes at 1.25 GHz and does not change the 1e.8000 register, leaving SerDes
+ * at 3.125 GHz.
+ * To avoid this, change PCS mode back to 2500base-x when disabling SerDes from
+ * 2500base-x mode.
+ */
+static int mv88e6393x_fix_2500basex_an(struct mv88e639x_pcs *mpcs, bool on)
+{
+ u16 reg;
+ int err;
+
+ if (on)
+ reg = MV88E6393X_SERDES_POC_PCS_1000BASEX |
+ MV88E6393X_SERDES_POC_AN;
+ else
+ reg = MV88E6393X_SERDES_POC_PCS_2500BASEX;
+
+ reg |= MV88E6393X_SERDES_POC_RESET;
+
+ err = mv88e639x_modify(mpcs, MV88E6393X_SERDES_POC,
+ MV88E6393X_SERDES_POC_PCS_MASK |
+ MV88E6393X_SERDES_POC_AN |
+ MV88E6393X_SERDES_POC_RESET, reg);
+ if (err)
+ return err;
+
+ return mdiodev_c45_write(&mpcs->mdio, MDIO_MMD_VEND1, 0x8000, 0x58);
+}
+
+static int mv88e6393x_sgmii_apply_2500basex_an(struct mv88e639x_pcs *mpcs,
+ phy_interface_t interface,
+ bool enable)
+{
+ int err;
+
+ if (interface != PHY_INTERFACE_MODE_2500BASEX)
+ return 0;
+
+ err = mv88e6393x_fix_2500basex_an(mpcs, enable);
+ if (err)
+ dev_err(mpcs->mdio.dev.parent,
+ "failed to %s 2500basex fix: %pe\n",
+ enable ? "enable" : "disable", ERR_PTR(err));
+
+ return err;
+}
+
+static void mv88e6393x_sgmii_pcs_disable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_sgmii_pcs_disable(pcs);
+ mv88e6393x_power_lane(mpcs, false);
+ mv88e6393x_sgmii_apply_2500basex_an(mpcs, mpcs->interface, false);
+}
+
+static void mv88e6393x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_sgmii_pcs_pre_config(pcs, interface);
+ mv88e6393x_power_lane(mpcs, false);
+ mv88e6393x_sgmii_apply_2500basex_an(mpcs, mpcs->interface, false);
+}
+
+static int mv88e6393x_sgmii_pcs_post_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = sgmii_pcs_to_mv88e639x_pcs(pcs);
+ int err;
+
+ err = mv88e6393x_erratum_4_8(mpcs);
+ if (err)
+ return err;
+
+ err = mv88e6393x_sgmii_apply_2500basex_an(mpcs, interface, true);
+ if (err)
+ return err;
+
+ err = mv88e6393x_power_lane(mpcs, true);
+ if (err)
+ return err;
+
+ return mv88e639x_sgmii_pcs_post_config(pcs, interface);
+}
+
+static const struct phylink_pcs_ops mv88e6393x_sgmii_pcs_ops = {
+ .pcs_enable = mv88e639x_sgmii_pcs_enable,
+ .pcs_disable = mv88e6393x_sgmii_pcs_disable,
+ .pcs_pre_config = mv88e6393x_sgmii_pcs_pre_config,
+ .pcs_post_config = mv88e6393x_sgmii_pcs_post_config,
+ .pcs_get_state = mv88e639x_sgmii_pcs_get_state,
+ .pcs_an_restart = mv88e639x_sgmii_pcs_an_restart,
+ .pcs_config = mv88e639x_sgmii_pcs_config,
+ .pcs_link_up = mv88e639x_sgmii_pcs_link_up,
+};
+
+static irqreturn_t mv88e6393x_xg_handle_irq(struct mv88e639x_pcs *mpcs)
+{
+ u16 int_status, stat1;
+ bool link_down;
+ int err;
+
+ err = mv88e639x_read(mpcs, MV88E6393X_10G_INT_STATUS, &int_status);
+ if (err)
+ return IRQ_NONE;
+
+ if (int_status & MV88E6393X_10G_INT_LINK_CHANGE) {
+ err = mv88e639x_read(mpcs, MV88E6390_10G_STAT1, &stat1);
+ if (err)
+ return IRQ_NONE;
+
+ link_down = !(stat1 & MDIO_STAT1_LSTATUS);
+
+ phylink_pcs_change(&mpcs->xg_pcs, !link_down);
+
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int mv88e6393x_xg_control_irq(struct mv88e639x_pcs *mpcs, bool enable)
+{
+ u16 val = 0;
+
+ if (enable)
+ val = MV88E6393X_10G_INT_LINK_CHANGE;
+
+ return mv88e639x_modify(mpcs, MV88E6393X_10G_INT_ENABLE,
+ MV88E6393X_10G_INT_LINK_CHANGE, val);
+}
+
+static int mv88e6393x_xg_pcs_enable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+
+ mpcs->handle_irq = mv88e6393x_xg_handle_irq;
+
+ return mv88e6393x_xg_control_irq(mpcs, !!mpcs->irq);
+}
+
+static void mv88e6393x_xg_pcs_disable(struct phylink_pcs *pcs)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e6393x_xg_control_irq(mpcs, false);
+ mv88e639x_xg_pcs_disable(mpcs);
+ mv88e6393x_power_lane(mpcs, false);
+}
+
+/* The PCS has to be powered down while CMODE is changed */
+static void mv88e6393x_xg_pcs_pre_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+
+ mv88e639x_xg_pcs_disable(mpcs);
+ mv88e6393x_power_lane(mpcs, false);
+}
+
+static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+ int err;
+
+ if (interface == PHY_INTERFACE_MODE_10GBASER) {
+ err = mv88e6393x_erratum_5_2(mpcs);
+ if (err)
+ return err;
+ }
+
+ err = mv88e6393x_power_lane(mpcs, true);
+ if (err)
+ return err;
+
+ return mv88e639x_xg_pcs_enable(mpcs);
+}
+
+static const struct phylink_pcs_ops mv88e6393x_xg_pcs_ops = {
+ .pcs_enable = mv88e6393x_xg_pcs_enable,
+ .pcs_disable = mv88e6393x_xg_pcs_disable,
+ .pcs_pre_config = mv88e6393x_xg_pcs_pre_config,
+ .pcs_post_config = mv88e6393x_xg_pcs_post_config,
+ .pcs_get_state = mv88e639x_xg_pcs_get_state,
+ .pcs_config = mv88e639x_xg_pcs_config,
+};
+
+static int mv88e6393x_pcs_init(struct mv88e6xxx_chip *chip, int port)
+{
+ struct mv88e639x_pcs *mpcs;
+ struct mii_bus *bus;
+ struct device *dev;
+ int lane, err;
+
+ lane = mv88e6xxx_serdes_get_lane(chip, port);
+ if (lane < 0)
+ return 0;
+
+ bus = mv88e6xxx_default_mdio_bus(chip);
+ dev = chip->dev;
+
+ mpcs = mv88e639x_pcs_alloc(dev, bus, lane, port);
+ if (!mpcs)
+ return -ENOMEM;
+
+ mpcs->sgmii_pcs.ops = &mv88e6393x_sgmii_pcs_ops;
+ mpcs->sgmii_pcs.neg_mode = true;
+ mpcs->xg_pcs.ops = &mv88e6393x_xg_pcs_ops;
+ mpcs->xg_pcs.neg_mode = true;
+ mpcs->supports_5g = true;
+
+ err = mv88e6393x_erratum_4_6(mpcs);
+ if (err)
+ goto err_free;
+
+ err = mv88e639x_pcs_setup_irq(mpcs, chip, port);
+ if (err)
+ goto err_free;
+
+ chip->ports[port].pcs_private = mpcs;
+
+ return 0;
+
+err_free:
+ kfree(mpcs);
+ return err;
+}
+
+const struct mv88e6xxx_pcs_ops mv88e6393x_pcs_ops = {
+ .pcs_init = mv88e6393x_pcs_init,
+ .pcs_teardown = mv88e639x_pcs_teardown,
+ .pcs_select = mv88e639x_pcs_select,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index dd66ec902d4c..5394a8cf7bf1 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -524,7 +524,6 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
phy_interface_t mode, bool force)
{
u16 cmode;
- int lane;
u16 reg;
int err;
@@ -577,19 +576,6 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
if (cmode == chip->ports[port].cmode && !force)
return 0;
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane >= 0) {
- if (chip->ports[port].serdes_irq) {
- err = mv88e6xxx_serdes_irq_disable(chip, port, lane);
- if (err)
- return err;
- }
-
- err = mv88e6xxx_serdes_power_down(chip, port, lane);
- if (err)
- return err;
- }
-
chip->ports[port].cmode = 0;
if (cmode) {
@@ -605,22 +591,6 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
return err;
chip->ports[port].cmode = cmode;
-
- lane = mv88e6xxx_serdes_get_lane(chip, port);
- if (lane == -ENODEV)
- return 0;
- if (lane < 0)
- return lane;
-
- err = mv88e6xxx_serdes_power_up(chip, port, lane);
- if (err)
- return err;
-
- if (chip->ports[port].serdes_irq) {
- err = mv88e6xxx_serdes_irq_enable(chip, port, lane);
- if (err)
- return err;
- }
}
return 0;
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 80167d53212f..3b4b42651fa3 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -39,15 +39,8 @@ static int mv88e6390_serdes_read(struct mv88e6xxx_chip *chip,
return mv88e6xxx_phy_read_c45(chip, lane, device, reg, val);
}
-static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip,
- int lane, int device, int reg, u16 val)
-{
- return mv88e6xxx_phy_write_c45(chip, lane, device, reg, val);
-}
-
-static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
- u16 bmsr, u16 lpa, u16 status,
- struct phylink_link_state *state)
+int mv88e6xxx_pcs_decode_state(struct device *dev, u16 bmsr, u16 lpa,
+ u16 status, struct phylink_link_state *state)
{
state->link = false;
@@ -88,7 +81,7 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
state->speed = SPEED_10;
break;
default:
- dev_err(chip->dev, "invalid PHY speed\n");
+ dev_err(dev, "invalid PHY speed\n");
return -EINVAL;
}
} else if (state->link &&
@@ -117,160 +110,6 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
return 0;
}
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool up)
-{
- u16 val, new_val;
- int err;
-
- err = mv88e6352_serdes_read(chip, MII_BMCR, &val);
- if (err)
- return err;
-
- if (up)
- new_val = val & ~BMCR_PDOWN;
- else
- new_val = val | BMCR_PDOWN;
-
- if (val != new_val)
- err = mv88e6352_serdes_write(chip, MII_BMCR, new_val);
-
- return err;
-}
-
-int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
- int lane, unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise)
-{
- u16 adv, bmcr, val;
- bool changed;
- int err;
-
- switch (interface) {
- case PHY_INTERFACE_MODE_SGMII:
- adv = 0x0001;
- break;
-
- case PHY_INTERFACE_MODE_1000BASEX:
- adv = linkmode_adv_to_mii_adv_x(advertise,
- ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
- break;
-
- default:
- return 0;
- }
-
- err = mv88e6352_serdes_read(chip, MII_ADVERTISE, &val);
- if (err)
- return err;
-
- changed = val != adv;
- if (changed) {
- err = mv88e6352_serdes_write(chip, MII_ADVERTISE, adv);
- if (err)
- return err;
- }
-
- err = mv88e6352_serdes_read(chip, MII_BMCR, &val);
- if (err)
- return err;
-
- if (phylink_autoneg_inband(mode))
- bmcr = val | BMCR_ANENABLE;
- else
- bmcr = val & ~BMCR_ANENABLE;
-
- if (bmcr == val)
- return changed;
-
- return mv88e6352_serdes_write(chip, MII_BMCR, bmcr);
-}
-
-int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state)
-{
- u16 bmsr, lpa, status;
- int err;
-
- err = mv88e6352_serdes_read(chip, MII_BMSR, &bmsr);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY BMSR: %d\n", err);
- return err;
- }
-
- err = mv88e6352_serdes_read(chip, 0x11, &status);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err);
- return err;
- }
-
- err = mv88e6352_serdes_read(chip, MII_LPA, &lpa);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY LPA: %d\n", err);
- return err;
- }
-
- return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
-}
-
-int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- u16 bmcr;
- int err;
-
- err = mv88e6352_serdes_read(chip, MII_BMCR, &bmcr);
- if (err)
- return err;
-
- return mv88e6352_serdes_write(chip, MII_BMCR, bmcr | BMCR_ANRESTART);
-}
-
-int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
- int lane, int speed, int duplex)
-{
- u16 val, bmcr;
- int err;
-
- err = mv88e6352_serdes_read(chip, MII_BMCR, &val);
- if (err)
- return err;
-
- bmcr = val & ~(BMCR_SPEED100 | BMCR_FULLDPLX | BMCR_SPEED1000);
- switch (speed) {
- case SPEED_1000:
- bmcr |= BMCR_SPEED1000;
- break;
- case SPEED_100:
- bmcr |= BMCR_SPEED100;
- break;
- case SPEED_10:
- break;
- }
-
- if (duplex == DUPLEX_FULL)
- bmcr |= BMCR_FULLDPLX;
-
- if (bmcr == val)
- return 0;
-
- return mv88e6352_serdes_write(chip, MII_BMCR, bmcr);
-}
-
-int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
-{
- u8 cmode = chip->ports[port].cmode;
- int lane = -ENODEV;
-
- if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASEX) ||
- (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX) ||
- (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
- lane = 0xff; /* Unused */
-
- return lane;
-}
-
struct mv88e6352_serdes_hw_stat {
char string[ETH_GSTRING_LEN];
int sizeof_stat;
@@ -363,51 +202,6 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
}
-static void mv88e6352_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
-{
- u16 bmsr;
- int err;
-
- /* If the link has dropped, we want to know about it. */
- err = mv88e6352_serdes_read(chip, MII_BMSR, &bmsr);
- if (err) {
- dev_err(chip->dev, "can't read Serdes BMSR: %d\n", err);
- return;
- }
-
- dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS));
-}
-
-irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- irqreturn_t ret = IRQ_NONE;
- u16 status;
- int err;
-
- err = mv88e6352_serdes_read(chip, MV88E6352_SERDES_INT_STATUS, &status);
- if (err)
- return ret;
-
- if (status & MV88E6352_SERDES_INT_LINK_CHANGE) {
- ret = IRQ_HANDLED;
- mv88e6352_serdes_irq_link(chip, port);
- }
-
- return ret;
-}
-
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable)
-{
- u16 val = 0;
-
- if (enable)
- val |= MV88E6352_SERDES_INT_LINK_CHANGE;
-
- return mv88e6352_serdes_write(chip, MV88E6352_SERDES_INT_ENABLE, val);
-}
-
unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
{
return irq_find_mapping(chip->g2_irq.domain, MV88E6352_SERDES_IRQ);
@@ -461,115 +255,6 @@ int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
return lane;
}
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool up)
-{
- /* The serdes power can't be controlled on this switch chip but we need
- * to supply this function to avoid returning -EOPNOTSUPP in
- * mv88e6xxx_serdes_power_up/mv88e6xxx_serdes_power_down
- */
- return 0;
-}
-
-int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
-{
- /* There are no configurable serdes lanes on this switch chip but we
- * need to return a non-negative lane number so that callers of
- * mv88e6xxx_serdes_get_lane() know this is a serdes port.
- */
- switch (chip->ports[port].cmode) {
- case MV88E6185_PORT_STS_CMODE_SERDES:
- case MV88E6185_PORT_STS_CMODE_1000BASE_X:
- return 0;
- default:
- return -ENODEV;
- }
-}
-
-int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state)
-{
- int err;
- u16 status;
-
- err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &status);
- if (err)
- return err;
-
- state->link = !!(status & MV88E6XXX_PORT_STS_LINK);
-
- if (state->link) {
- state->duplex = status & MV88E6XXX_PORT_STS_DUPLEX ? DUPLEX_FULL : DUPLEX_HALF;
-
- switch (status & MV88E6XXX_PORT_STS_SPEED_MASK) {
- case MV88E6XXX_PORT_STS_SPEED_1000:
- state->speed = SPEED_1000;
- break;
- case MV88E6XXX_PORT_STS_SPEED_100:
- state->speed = SPEED_100;
- break;
- case MV88E6XXX_PORT_STS_SPEED_10:
- state->speed = SPEED_10;
- break;
- default:
- dev_err(chip->dev, "invalid PHY speed\n");
- return -EINVAL;
- }
- } else {
- state->duplex = DUPLEX_UNKNOWN;
- state->speed = SPEED_UNKNOWN;
- }
-
- return 0;
-}
-
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable)
-{
- u8 cmode = chip->ports[port].cmode;
-
- /* The serdes interrupts are enabled in the G2_INT_MASK register. We
- * need to return 0 to avoid returning -EOPNOTSUPP in
- * mv88e6xxx_serdes_irq_enable/mv88e6xxx_serdes_irq_disable
- */
- switch (cmode) {
- case MV88E6185_PORT_STS_CMODE_SERDES:
- case MV88E6185_PORT_STS_CMODE_1000BASE_X:
- return 0;
- }
-
- return -EOPNOTSUPP;
-}
-
-static void mv88e6097_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
-{
- u16 status;
- int err;
-
- err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &status);
- if (err) {
- dev_err(chip->dev, "can't read port status: %d\n", err);
- return;
- }
-
- dsa_port_phylink_mac_change(chip->ds, port, !!(status & MV88E6XXX_PORT_STS_LINK));
-}
-
-irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- u8 cmode = chip->ports[port].cmode;
-
- switch (cmode) {
- case MV88E6185_PORT_STS_CMODE_SERDES:
- case MV88E6185_PORT_STS_CMODE_1000BASE_X:
- mv88e6097_serdes_irq_link(chip, port);
- return IRQ_HANDLED;
- }
-
- return IRQ_NONE;
-}
-
int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
{
u8 cmode = chip->ports[port].cmode;
@@ -690,57 +375,6 @@ int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
return lane;
}
-/* Set power up/down for 10GBASE-R and 10GBASE-X4/X2 */
-static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, int lane,
- bool up)
-{
- u16 val, new_val;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_10G_CTRL1, &val);
-
- if (err)
- return err;
-
- if (up)
- new_val = val & ~(MDIO_CTRL1_RESET |
- MDIO_PCS_CTRL1_LOOPBACK |
- MDIO_CTRL1_LPOWER);
- else
- new_val = val | MDIO_CTRL1_LPOWER;
-
- if (val != new_val)
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_10G_CTRL1, new_val);
-
- return err;
-}
-
-/* Set power up/down for SGMII and 1000Base-X */
-static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, int lane,
- bool up)
-{
- u16 val, new_val;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, &val);
- if (err)
- return err;
-
- if (up)
- new_val = val & ~(BMCR_RESET | BMCR_LOOPBACK | BMCR_PDOWN);
- else
- new_val = val | BMCR_PDOWN;
-
- if (val != new_val)
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, new_val);
-
- return err;
-}
-
struct mv88e6390_serdes_hw_stat {
char string[ETH_GSTRING_LEN];
int reg;
@@ -814,484 +448,6 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
}
-static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, int lane)
-{
- u16 reg;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_PG_CONTROL, &reg);
- if (err)
- return err;
-
- reg |= MV88E6390_PG_CONTROL_ENABLE_PC;
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_PG_CONTROL, reg);
-}
-
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool up)
-{
- u8 cmode = chip->ports[port].cmode;
- int err;
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- err = mv88e6390_serdes_power_sgmii(chip, lane, up);
- break;
- case MV88E6XXX_PORT_STS_CMODE_XAUI:
- case MV88E6XXX_PORT_STS_CMODE_RXAUI:
- err = mv88e6390_serdes_power_10g(chip, lane, up);
- break;
- default:
- err = -EINVAL;
- break;
- }
-
- if (!err && up)
- err = mv88e6390_serdes_enable_checker(chip, lane);
-
- return err;
-}
-
-int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
- int lane, unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise)
-{
- u16 val, bmcr, adv;
- bool changed;
- int err;
-
- switch (interface) {
- case PHY_INTERFACE_MODE_SGMII:
- adv = 0x0001;
- break;
-
- case PHY_INTERFACE_MODE_1000BASEX:
- adv = linkmode_adv_to_mii_adv_x(advertise,
- ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
- break;
-
- case PHY_INTERFACE_MODE_2500BASEX:
- adv = linkmode_adv_to_mii_adv_x(advertise,
- ETHTOOL_LINK_MODE_2500baseX_Full_BIT);
- break;
-
- default:
- return 0;
- }
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_ADVERTISE, &val);
- if (err)
- return err;
-
- changed = val != adv;
- if (changed) {
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_ADVERTISE, adv);
- if (err)
- return err;
- }
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, &val);
- if (err)
- return err;
-
- if (phylink_autoneg_inband(mode))
- bmcr = val | BMCR_ANENABLE;
- else
- bmcr = val & ~BMCR_ANENABLE;
-
- /* setting ANENABLE triggers a restart of negotiation */
- if (bmcr == val)
- return changed;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, bmcr);
-}
-
-static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
- int port, int lane, struct phylink_link_state *state)
-{
- u16 bmsr, lpa, status;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMSR, &bmsr);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY BMSR: %d\n", err);
- return err;
- }
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_PHY_STATUS, &status);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err);
- return err;
- }
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_LPA, &lpa);
- if (err) {
- dev_err(chip->dev, "can't read Serdes PHY LPA: %d\n", err);
- return err;
- }
-
- return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
-}
-
-static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
- int port, int lane, struct phylink_link_state *state)
-{
- u16 status;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_10G_STAT1, &status);
- if (err)
- return err;
-
- state->link = !!(status & MDIO_STAT1_LSTATUS);
- if (state->link) {
- state->speed = SPEED_10000;
- state->duplex = DUPLEX_FULL;
- }
-
- return 0;
-}
-
-static int mv88e6393x_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
- int port, int lane,
- struct phylink_link_state *state)
-{
- u16 status;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_10G_STAT1, &status);
- if (err)
- return err;
-
- state->link = !!(status & MDIO_STAT1_LSTATUS);
- if (state->link) {
- if (state->interface == PHY_INTERFACE_MODE_5GBASER)
- state->speed = SPEED_5000;
- else
- state->speed = SPEED_10000;
- state->duplex = DUPLEX_FULL;
- }
- return 0;
-}
-
-/* USXGMII registers for Marvell switch 88e639x are undocumented and this function is based
- * on some educated guesses. It appears that there are no status bits related to
- * autonegotiation complete or flow control.
- */
-static int mv88e639x_serdes_pcs_get_state_usxgmii(struct mv88e6xxx_chip *chip,
- int port, int lane,
- struct phylink_link_state *state)
-{
- u16 status, lp_status;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_USXGMII_PHY_STATUS, &status);
- if (err) {
- dev_err(chip->dev, "can't read Serdes USXGMII PHY status: %d\n", err);
- return err;
- }
- dev_dbg(chip->dev, "USXGMII PHY status: 0x%x\n", status);
-
- state->link = !!(status & MDIO_USXGMII_LINK);
- state->an_complete = state->link;
-
- if (state->link) {
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_USXGMII_LP_STATUS, &lp_status);
- if (err) {
- dev_err(chip->dev, "can't read Serdes USXGMII LP status: %d\n", err);
- return err;
- }
- dev_dbg(chip->dev, "USXGMII LP status: 0x%x\n", lp_status);
- /* lp_status appears to include the "link" bit as per USXGMII spec. */
- phylink_decode_usxgmii_word(state, lp_status);
- }
- return 0;
-}
-
-int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state)
-{
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- return mv88e6390_serdes_pcs_get_state_sgmii(chip, port, lane,
- state);
- case PHY_INTERFACE_MODE_XAUI:
- case PHY_INTERFACE_MODE_RXAUI:
- return mv88e6390_serdes_pcs_get_state_10g(chip, port, lane,
- state);
-
- default:
- return -EOPNOTSUPP;
- }
-}
-
-int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state)
-{
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- return mv88e6390_serdes_pcs_get_state_sgmii(chip, port, lane,
- state);
- case PHY_INTERFACE_MODE_5GBASER:
- case PHY_INTERFACE_MODE_10GBASER:
- return mv88e6393x_serdes_pcs_get_state_10g(chip, port, lane,
- state);
- case PHY_INTERFACE_MODE_USXGMII:
- return mv88e639x_serdes_pcs_get_state_usxgmii(chip, port, lane,
- state);
-
- default:
- return -EOPNOTSUPP;
- }
-}
-
-int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- u16 bmcr;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, &bmcr);
- if (err)
- return err;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR,
- bmcr | BMCR_ANRESTART);
-}
-
-int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
- int lane, int speed, int duplex)
-{
- u16 val, bmcr;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, &val);
- if (err)
- return err;
-
- bmcr = val & ~(BMCR_SPEED100 | BMCR_FULLDPLX | BMCR_SPEED1000);
- switch (speed) {
- case SPEED_2500:
- case SPEED_1000:
- bmcr |= BMCR_SPEED1000;
- break;
- case SPEED_100:
- bmcr |= BMCR_SPEED100;
- break;
- case SPEED_10:
- break;
- }
-
- if (duplex == DUPLEX_FULL)
- bmcr |= BMCR_FULLDPLX;
-
- if (bmcr == val)
- return 0;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, bmcr);
-}
-
-static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
- int port, int lane)
-{
- u16 bmsr;
- int err;
-
- /* If the link has dropped, we want to know about it. */
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMSR, &bmsr);
- if (err) {
- dev_err(chip->dev, "can't read Serdes BMSR: %d\n", err);
- return;
- }
-
- dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS));
-}
-
-static void mv88e6393x_serdes_irq_link_10g(struct mv88e6xxx_chip *chip,
- int port, u8 lane)
-{
- u16 status;
- int err;
-
- /* If the link has dropped, we want to know about it. */
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_10G_STAT1, &status);
- if (err) {
- dev_err(chip->dev, "can't read Serdes STAT1: %d\n", err);
- return;
- }
-
- dsa_port_phylink_mac_change(chip->ds, port, !!(status & MDIO_STAT1_LSTATUS));
-}
-
-static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
- int lane, bool enable)
-{
- u16 val = 0;
-
- if (enable)
- val |= MV88E6390_SGMII_INT_LINK_DOWN |
- MV88E6390_SGMII_INT_LINK_UP;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_INT_ENABLE, val);
-}
-
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable)
-{
- u8 cmode = chip->ports[port].cmode;
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- return mv88e6390_serdes_irq_enable_sgmii(chip, lane, enable);
- }
-
- return 0;
-}
-
-static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
- int lane, u16 *status)
-{
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_INT_STATUS, status);
-
- return err;
-}
-
-static int mv88e6393x_serdes_irq_enable_10g(struct mv88e6xxx_chip *chip,
- u8 lane, bool enable)
-{
- u16 val = 0;
-
- if (enable)
- val |= MV88E6393X_10G_INT_LINK_CHANGE;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_10G_INT_ENABLE, val);
-}
-
-int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
- int lane, bool enable)
-{
- u8 cmode = chip->ports[port].cmode;
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- return mv88e6390_serdes_irq_enable_sgmii(chip, lane, enable);
- case MV88E6393X_PORT_STS_CMODE_5GBASER:
- case MV88E6393X_PORT_STS_CMODE_10GBASER:
- case MV88E6393X_PORT_STS_CMODE_USXGMII:
- return mv88e6393x_serdes_irq_enable_10g(chip, lane, enable);
- }
-
- return 0;
-}
-
-static int mv88e6393x_serdes_irq_status_10g(struct mv88e6xxx_chip *chip,
- u8 lane, u16 *status)
-{
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_10G_INT_STATUS, status);
-
- return err;
-}
-
-irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- u8 cmode = chip->ports[port].cmode;
- irqreturn_t ret = IRQ_NONE;
- u16 status;
- int err;
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- err = mv88e6390_serdes_irq_status_sgmii(chip, lane, &status);
- if (err)
- return ret;
- if (status & (MV88E6390_SGMII_INT_LINK_DOWN |
- MV88E6390_SGMII_INT_LINK_UP)) {
- ret = IRQ_HANDLED;
- mv88e6390_serdes_irq_link_sgmii(chip, port, lane);
- }
- break;
- case MV88E6393X_PORT_STS_CMODE_5GBASER:
- case MV88E6393X_PORT_STS_CMODE_10GBASER:
- case MV88E6393X_PORT_STS_CMODE_USXGMII:
- err = mv88e6393x_serdes_irq_status_10g(chip, lane, &status);
- if (err)
- return err;
- if (status & MV88E6393X_10G_INT_LINK_CHANGE) {
- ret = IRQ_HANDLED;
- mv88e6393x_serdes_irq_link_10g(chip, port, lane);
- }
- break;
- }
-
- return ret;
-}
-
-irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane)
-{
- u8 cmode = chip->ports[port].cmode;
- irqreturn_t ret = IRQ_NONE;
- u16 status;
- int err;
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- err = mv88e6390_serdes_irq_status_sgmii(chip, lane, &status);
- if (err)
- return ret;
- if (status & (MV88E6390_SGMII_INT_LINK_DOWN |
- MV88E6390_SGMII_INT_LINK_UP)) {
- ret = IRQ_HANDLED;
- mv88e6390_serdes_irq_link_sgmii(chip, port, lane);
- }
- }
-
- return ret;
-}
-
unsigned int mv88e6390_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
{
return irq_find_mapping(chip->g2_irq.domain, port);
@@ -1390,259 +546,3 @@ int mv88e6352_serdes_set_tx_amplitude(struct mv88e6xxx_chip *chip, int port,
return mv88e6352_serdes_write(chip, MV88E6352_SERDES_SPEC_CTRL2, ctrl);
}
-
-static int mv88e6393x_serdes_power_lane(struct mv88e6xxx_chip *chip, int lane,
- bool on)
-{
- u16 reg;
- int err;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_CTRL1, &reg);
- if (err)
- return err;
-
- if (on)
- reg &= ~(MV88E6393X_SERDES_CTRL1_TX_PDOWN |
- MV88E6393X_SERDES_CTRL1_RX_PDOWN);
- else
- reg |= MV88E6393X_SERDES_CTRL1_TX_PDOWN |
- MV88E6393X_SERDES_CTRL1_RX_PDOWN;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_CTRL1, reg);
-}
-
-static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane)
-{
- u16 reg;
- int err;
-
- /* mv88e6393x family errata 4.6:
- * Cannot clear PwrDn bit on SERDES if device is configured CPU_MGD
- * mode or P0_mode is configured for [x]MII.
- * Workaround: Set SERDES register 4.F002 bit 5=0 and bit 15=1.
- *
- * It seems that after this workaround the SERDES is automatically
- * powered up (the bit is cleared), so power it down.
- */
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_POC, &reg);
- if (err)
- return err;
-
- reg &= ~MV88E6393X_SERDES_POC_PDOWN;
- reg |= MV88E6393X_SERDES_POC_RESET;
-
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_POC, reg);
- if (err)
- return err;
-
- err = mv88e6390_serdes_power_sgmii(chip, lane, false);
- if (err)
- return err;
-
- return mv88e6393x_serdes_power_lane(chip, lane, false);
-}
-
-int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
-{
- int err;
-
- err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT0_LANE);
- if (err)
- return err;
-
- err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT9_LANE);
- if (err)
- return err;
-
- return mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT10_LANE);
-}
-
-static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane)
-{
- u16 reg, pcs;
- int err;
-
- /* mv88e6393x family errata 4.8:
- * When a SERDES port is operating in 1000BASE-X or SGMII mode link may
- * not come up after hardware reset or software reset of SERDES core.
- * Workaround is to write SERDES register 4.F074.14=1 for only those
- * modes and 0 in all other modes.
- */
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_POC, &pcs);
- if (err)
- return err;
-
- pcs &= MV88E6393X_SERDES_POC_PCS_MASK;
-
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_ERRATA_4_8_REG, &reg);
- if (err)
- return err;
-
- if (pcs == MV88E6393X_SERDES_POC_PCS_1000BASEX ||
- pcs == MV88E6393X_SERDES_POC_PCS_SGMII_PHY ||
- pcs == MV88E6393X_SERDES_POC_PCS_SGMII_MAC)
- reg |= MV88E6393X_ERRATA_4_8_BIT;
- else
- reg &= ~MV88E6393X_ERRATA_4_8_BIT;
-
- return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_ERRATA_4_8_REG, reg);
-}
-
-static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane,
- u8 cmode)
-{
- static const struct {
- u16 dev, reg, val, mask;
- } fixes[] = {
- { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff },
- { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff },
- { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff },
- { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f },
- { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 },
- { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff },
- { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC,
- MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET },
- };
- int err, i;
- u16 reg;
-
- /* mv88e6393x family errata 5.2:
- * For optimal signal integrity the following sequence should be applied
- * to SERDES operating in 10G mode. These registers only apply to 10G
- * operation and have no effect on other speeds.
- */
- if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER &&
- cmode != MV88E6393X_PORT_STS_CMODE_USXGMII)
- return 0;
-
- for (i = 0; i < ARRAY_SIZE(fixes); ++i) {
- err = mv88e6390_serdes_read(chip, lane, fixes[i].dev,
- fixes[i].reg, &reg);
- if (err)
- return err;
-
- reg &= ~fixes[i].mask;
- reg |= fixes[i].val;
-
- err = mv88e6390_serdes_write(chip, lane, fixes[i].dev,
- fixes[i].reg, reg);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int mv88e6393x_serdes_fix_2500basex_an(struct mv88e6xxx_chip *chip,
- int lane, u8 cmode, bool on)
-{
- u16 reg;
- int err;
-
- if (cmode != MV88E6XXX_PORT_STS_CMODE_2500BASEX)
- return 0;
-
- /* Inband AN is broken on Amethyst in 2500base-x mode when set by
- * standard mechanism (via cmode).
- * We can get around this by configuring the PCS mode to 1000base-x
- * and then writing value 0x58 to register 1e.8000. (This must be done
- * while SerDes receiver and transmitter are disabled, which is, when
- * this function is called.)
- * It seem that when we do this configuration to 2500base-x mode (by
- * changing PCS mode to 1000base-x and frequency to 3.125 GHz from
- * 1.25 GHz) and then configure to sgmii or 1000base-x, the device
- * thinks that it already has SerDes at 1.25 GHz and does not change
- * the 1e.8000 register, leaving SerDes at 3.125 GHz.
- * To avoid this, change PCS mode back to 2500base-x when disabling
- * SerDes from 2500base-x mode.
- */
- err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_POC, &reg);
- if (err)
- return err;
-
- reg &= ~(MV88E6393X_SERDES_POC_PCS_MASK | MV88E6393X_SERDES_POC_AN);
- if (on)
- reg |= MV88E6393X_SERDES_POC_PCS_1000BASEX |
- MV88E6393X_SERDES_POC_AN;
- else
- reg |= MV88E6393X_SERDES_POC_PCS_2500BASEX;
- reg |= MV88E6393X_SERDES_POC_RESET;
-
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
- MV88E6393X_SERDES_POC, reg);
- if (err)
- return err;
-
- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_VEND1, 0x8000, 0x58);
- if (err)
- return err;
-
- return 0;
-}
-
-int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool on)
-{
- u8 cmode = chip->ports[port].cmode;
- int err;
-
- if (port != 0 && port != 9 && port != 10)
- return -EOPNOTSUPP;
-
- if (on) {
- err = mv88e6393x_serdes_erratum_4_8(chip, lane);
- if (err)
- return err;
-
- err = mv88e6393x_serdes_erratum_5_2(chip, lane, cmode);
- if (err)
- return err;
-
- err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
- true);
- if (err)
- return err;
-
- err = mv88e6393x_serdes_power_lane(chip, lane, true);
- if (err)
- return err;
- }
-
- switch (cmode) {
- case MV88E6XXX_PORT_STS_CMODE_SGMII:
- case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
- case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- err = mv88e6390_serdes_power_sgmii(chip, lane, on);
- break;
- case MV88E6393X_PORT_STS_CMODE_5GBASER:
- case MV88E6393X_PORT_STS_CMODE_10GBASER:
- case MV88E6393X_PORT_STS_CMODE_USXGMII:
- err = mv88e6390_serdes_power_10g(chip, lane, on);
- break;
- default:
- err = -EINVAL;
- break;
- }
-
- if (err)
- return err;
-
- if (!on) {
- err = mv88e6393x_serdes_power_lane(chip, lane, false);
- if (err)
- return err;
-
- err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
- false);
- }
-
- return err;
-}
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index e245687ddb1d..aac95cab46e3 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -12,6 +12,8 @@
#include "chip.h"
+struct phylink_link_state;
+
#define MV88E6352_ADDR_SERDES 0x0f
#define MV88E6352_SERDES_PAGE_FIBER 0x01
#define MV88E6352_SERDES_IRQ 0x0b
@@ -44,6 +46,10 @@
/* 10GBASE-R and 10GBASE-X4/X2 */
#define MV88E6390_10G_CTRL1 (0x1000 + MDIO_CTRL1)
#define MV88E6390_10G_STAT1 (0x1000 + MDIO_STAT1)
+#define MV88E6390_10G_INT_ENABLE 0x9001
+#define MV88E6390_10G_INT_LINK_DOWN BIT(3)
+#define MV88E6390_10G_INT_LINK_UP BIT(2)
+#define MV88E6390_10G_INT_STATUS 0x9003
#define MV88E6393X_10G_INT_ENABLE 0x9000
#define MV88E6393X_10G_INT_LINK_CHANGE BIT(2)
#define MV88E6393X_10G_INT_STATUS 0x9001
@@ -107,65 +113,17 @@
#define MV88E6393X_ERRATA_4_8_REG 0xF074
#define MV88E6393X_ERRATA_4_8_BIT BIT(14)
-int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_pcs_decode_state(struct device *dev, u16 bmsr, u16 lpa,
+ u16 status, struct phylink_link_state *state);
+
int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
- int lane, unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise);
-int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
- int lane, unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertise);
-int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state);
-int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state);
-int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state);
-int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
- int lane, struct phylink_link_state *state);
-int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
- int lane);
-int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
- int lane);
-int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
- int lane, int speed, int duplex);
-int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
- int lane, int speed, int duplex);
unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
int port);
unsigned int mv88e6390_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
int port);
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool up);
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool on);
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool on);
-int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
- bool on);
-int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip);
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable);
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable);
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
- bool enable);
-int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
- int lane, bool enable);
-irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane);
-irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane);
-irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane);
-irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
- int lane);
int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
int port, uint8_t *data);
@@ -195,24 +153,6 @@ static inline int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
return chip->info->ops->serdes_get_lane(chip, port);
}
-static inline int mv88e6xxx_serdes_power_up(struct mv88e6xxx_chip *chip,
- int port, int lane)
-{
- if (!chip->info->ops->serdes_power)
- return -EOPNOTSUPP;
-
- return chip->info->ops->serdes_power(chip, port, lane, true);
-}
-
-static inline int mv88e6xxx_serdes_power_down(struct mv88e6xxx_chip *chip,
- int port, int lane)
-{
- if (!chip->info->ops->serdes_power)
- return -EOPNOTSUPP;
-
- return chip->info->ops->serdes_power(chip, port, lane, false);
-}
-
static inline unsigned int
mv88e6xxx_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
{
@@ -222,31 +162,9 @@ mv88e6xxx_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
return chip->info->ops->serdes_irq_mapping(chip, port);
}
-static inline int mv88e6xxx_serdes_irq_enable(struct mv88e6xxx_chip *chip,
- int port, int lane)
-{
- if (!chip->info->ops->serdes_irq_enable)
- return -EOPNOTSUPP;
-
- return chip->info->ops->serdes_irq_enable(chip, port, lane, true);
-}
-
-static inline int mv88e6xxx_serdes_irq_disable(struct mv88e6xxx_chip *chip,
- int port, int lane)
-{
- if (!chip->info->ops->serdes_irq_enable)
- return -EOPNOTSUPP;
-
- return chip->info->ops->serdes_irq_enable(chip, port, lane, false);
-}
-
-static inline irqreturn_t
-mv88e6xxx_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, int lane)
-{
- if (!chip->info->ops->serdes_irq_status)
- return IRQ_NONE;
-
- return chip->info->ops->serdes_irq_status(chip, port, lane);
-}
+extern const struct mv88e6xxx_pcs_ops mv88e6185_pcs_ops;
+extern const struct mv88e6xxx_pcs_ops mv88e6352_pcs_ops;
+extern const struct mv88e6xxx_pcs_ops mv88e6390_pcs_ops;
+extern const struct mv88e6xxx_pcs_ops mv88e6393x_pcs_ops;
#endif
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 8da46d284e35..fd7eb4a52918 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1042,12 +1042,6 @@ static void felix_phylink_get_caps(struct dsa_switch *ds, int port,
{
struct ocelot *ocelot = ds->priv;
- /* This driver does not make use of the speed, duplex, pause or the
- * advertisement in its mac_config, so it is safe to mark this driver
- * as non-legacy.
- */
- config->legacy_pre_march2020 = false;
-
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000FD |
MAC_2500FD;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 1c113957fcf4..dce3548dcd05 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -16,6 +16,7 @@
#include <net/pkt_sched.h>
#include <linux/iopoll.h>
#include <linux/mdio.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/time.h>
#include "felix.h"
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 15003b2af264..8f912bda120b 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -2,13 +2,14 @@
/* Distributed Switch Architecture VSC9953 driver
* Copyright (C) 2020, Maxim Kochetkov <fido_max@inbox.ru>
*/
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <soc/mscc/ocelot_vcap.h>
#include <soc/mscc/ocelot_sys.h>
#include <soc/mscc/ocelot.h>
#include <linux/mdio/mdio-mscc-miim.h>
+#include <linux/mod_devicetable.h>
#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
#include <linux/pcs-lynx.h>
#include <linux/dsa/ocelot.h>
#include <linux/iopoll.h>
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index 3b0937031499..8d9d271ac3af 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -1012,7 +1012,7 @@ static const struct regmap_config ar9331_mdio_regmap_config = {
.wr_table = &ar9331_register_set,
.rd_table = &ar9331_register_set,
- .cache_type = REGCACHE_RBTREE,
+ .cache_type = REGCACHE_MAPLE,
};
static struct regmap_bus ar9331_sw_bus = {
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index efe9380d4a15..47b9f5bf98fb 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -1400,8 +1400,6 @@ static void qca8k_phylink_get_caps(struct dsa_switch *ds, int port,
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000FD;
-
- config->legacy_pre_march2020 = false;
}
static void
diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c
index 1261e0bb21ef..e8c16e76e34b 100644
--- a/drivers/net/dsa/qca/qca8k-leds.c
+++ b/drivers/net/dsa/qca/qca8k-leds.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/property.h>
#include <linux/regmap.h>
#include <net/dsa.h>
diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
index 5a8fe707ca25..4310e7793e58 100644
--- a/drivers/net/dsa/realtek/realtek-mdio.c
+++ b/drivers/net/dsa/realtek/realtek-mdio.c
@@ -20,7 +20,7 @@
*/
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/overflow.h>
#include <linux/regmap.h>
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
index 1b447d96b9c4..c2bd8bb6c9c2 100644
--- a/drivers/net/dsa/realtek/realtek-smi.c
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -31,7 +31,6 @@
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 3529a565b4aa..331bb1c6676a 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -15,7 +15,6 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/of_mdio.h>
-#include <linux/of_device.h>
#include <linux/pcs/pcs-xpcs.h>
#include <linux/netdev_features.h>
#include <linux/netdevice.h>
@@ -1396,12 +1395,6 @@ static void sja1105_phylink_get_caps(struct dsa_switch *ds, int port,
struct sja1105_xmii_params_entry *mii;
phy_interface_t phy_mode;
- /* This driver does not make use of the speed, duplex, pause or the
- * advertisement in its mac_config, so it is safe to mark this driver
- * as non-legacy.
- */
- config->legacy_pre_march2020 = false;
-
phy_mode = priv->phy_mode[port];
if (phy_mode == PHY_INTERFACE_MODE_SGMII ||
phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
index ef1a4a7c47b2..4f09e7438f3b 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
@@ -18,7 +18,6 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/bitops.h>
#include <linux/if_bridge.h>
diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index fa622639d640..753fef757f11 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -7,7 +7,7 @@
#include <net/dsa.h>
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/netdev_features.h>
#include <linux/if_hsr.h>
#include "xrs700x.h"
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 5dfc751572ed..220400a633f5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -93,7 +93,7 @@ static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self)
static int hw_atl2_hw_reset(struct aq_hw_s *self)
{
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+ struct hw_atl2_priv *priv = self->priv;
int err;
err = hw_atl2_utils_soft_reset(self);
@@ -378,8 +378,8 @@ static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self)
{
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
u8 *prio_tc_map = self->aq_nic_cfg->prio_tc_map;
+ struct hw_atl2_priv *priv = self->priv;
u16 action;
u8 index;
int i;
@@ -433,7 +433,7 @@ static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self,
u16 off_action = (!promisc &&
!hw_atl_rpfl2promiscuous_mode_en_get(self)) ?
HW_ATL2_ACTION_DROP : HW_ATL2_ACTION_DISABLE;
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+ struct hw_atl2_priv *priv = self->priv;
u8 index;
index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_OFF_INDEX;
@@ -445,7 +445,7 @@ static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self,
static void hw_atl2_hw_new_rx_filter_promisc(struct aq_hw_s *self, bool promisc)
{
u16 off_action = promisc ? HW_ATL2_ACTION_DISABLE : HW_ATL2_ACTION_DROP;
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+ struct hw_atl2_priv *priv = self->priv;
bool vlan_promisc_enable;
u8 index;
@@ -539,8 +539,8 @@ static int hw_atl2_hw_init(struct aq_hw_s *self, const u8 *mac_addr)
[AQ_HW_IRQ_MSIX] = { 0x20000022U, 0x20000026U },
};
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
+ struct hw_atl2_priv *priv = self->priv;
u8 base_index, count;
int err;
@@ -770,7 +770,7 @@ static struct aq_stats_s *hw_atl2_utils_get_hw_stats(struct aq_hw_s *self)
static int hw_atl2_hw_vlan_set(struct aq_hw_s *self,
struct aq_rx_filter_vlan *aq_vlans)
{
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+ struct hw_atl2_priv *priv = self->priv;
u32 queue;
u8 index;
int i;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
index 674683b54304..52e2070a4a2f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -413,8 +413,8 @@ do { \
static int aq_a2_fw_update_stats(struct aq_hw_s *self)
{
- struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
struct aq_stats_s *cs = &self->curr_stats;
+ struct hw_atl2_priv *priv = self->priv;
struct statistics_s stats;
struct version_s version;
int err;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 948586bf1b5b..75ca3ddda1f5 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -255,4 +255,16 @@ config BNXT_HWMON
Say Y if you want to expose the thermal sensor data on NetXtreme-C/E
devices, via the hwmon sysfs interface.
+config BCMASP
+ tristate "Broadcom ASP 2.0 Ethernet support"
+ depends on ARCH_BRCMSTB || COMPILE_TEST
+ default ARCH_BRCMSTB
+ depends on OF
+ select MII
+ select PHYLIB
+ select MDIO_BCM_UNIMAC
+ help
+ This configuration enables the Broadcom ASP 2.0 Ethernet controller
+ driver which is present in Broadcom STB SoCs such as 72165.
+
endif # NET_VENDOR_BROADCOM
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index 0ddfb5b5d53c..bac5cb6ad0cd 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o
obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o
obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o
obj-$(CONFIG_BNXT) += bnxt/
+obj-$(CONFIG_BCMASP) += asp2/
diff --git a/drivers/net/ethernet/broadcom/asp2/Makefile b/drivers/net/ethernet/broadcom/asp2/Makefile
new file mode 100644
index 000000000000..e07550315f83
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BCMASP) += bcm-asp.o
+bcm-asp-objs := bcmasp.o bcmasp_intf.o bcmasp_ethtool.o
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
new file mode 100644
index 000000000000..a9984efff6d1
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -0,0 +1,1437 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Broadcom STB ASP 2.0 Driver
+ *
+ * Copyright (c) 2023 Broadcom
+ */
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/clk.h>
+
+#include "bcmasp.h"
+#include "bcmasp_intf_defs.h"
+
+static void _intr2_mask_clear(struct bcmasp_priv *priv, u32 mask)
+{
+ intr2_core_wl(priv, mask, ASP_INTR2_MASK_CLEAR);
+ priv->irq_mask &= ~mask;
+}
+
+static void _intr2_mask_set(struct bcmasp_priv *priv, u32 mask)
+{
+ intr2_core_wl(priv, mask, ASP_INTR2_MASK_SET);
+ priv->irq_mask |= mask;
+}
+
+void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ if (en)
+ _intr2_mask_clear(priv, ASP_INTR2_TX_DESC(intf->channel));
+ else
+ _intr2_mask_set(priv, ASP_INTR2_TX_DESC(intf->channel));
+}
+EXPORT_SYMBOL_GPL(bcmasp_enable_tx_irq);
+
+void bcmasp_enable_rx_irq(struct bcmasp_intf *intf, int en)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ if (en)
+ _intr2_mask_clear(priv, ASP_INTR2_RX_ECH(intf->channel));
+ else
+ _intr2_mask_set(priv, ASP_INTR2_RX_ECH(intf->channel));
+}
+EXPORT_SYMBOL_GPL(bcmasp_enable_rx_irq);
+
+static void bcmasp_intr2_mask_set_all(struct bcmasp_priv *priv)
+{
+ _intr2_mask_set(priv, 0xffffffff);
+ priv->irq_mask = 0xffffffff;
+}
+
+static void bcmasp_intr2_clear_all(struct bcmasp_priv *priv)
+{
+ intr2_core_wl(priv, 0xffffffff, ASP_INTR2_CLEAR);
+}
+
+static void bcmasp_intr2_handling(struct bcmasp_intf *intf, u32 status)
+{
+ if (status & ASP_INTR2_RX_ECH(intf->channel)) {
+ if (likely(napi_schedule_prep(&intf->rx_napi))) {
+ bcmasp_enable_rx_irq(intf, 0);
+ __napi_schedule_irqoff(&intf->rx_napi);
+ }
+ }
+
+ if (status & ASP_INTR2_TX_DESC(intf->channel)) {
+ if (likely(napi_schedule_prep(&intf->tx_napi))) {
+ bcmasp_enable_tx_irq(intf, 0);
+ __napi_schedule_irqoff(&intf->tx_napi);
+ }
+ }
+}
+
+static irqreturn_t bcmasp_isr(int irq, void *data)
+{
+ struct bcmasp_priv *priv = data;
+ struct bcmasp_intf *intf;
+ u32 status;
+
+ status = intr2_core_rl(priv, ASP_INTR2_STATUS) &
+ ~intr2_core_rl(priv, ASP_INTR2_MASK_STATUS);
+
+ intr2_core_wl(priv, status, ASP_INTR2_CLEAR);
+
+ if (unlikely(status == 0)) {
+ dev_warn(&priv->pdev->dev, "l2 spurious interrupt\n");
+ return IRQ_NONE;
+ }
+
+ /* Handle intferfaces */
+ list_for_each_entry(intf, &priv->intfs, list)
+ bcmasp_intr2_handling(intf, status);
+
+ return IRQ_HANDLED;
+}
+
+void bcmasp_flush_rx_port(struct bcmasp_intf *intf)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ u32 mask;
+
+ switch (intf->port) {
+ case 0:
+ mask = ASP_CTRL_UMAC0_FLUSH_MASK;
+ break;
+ case 1:
+ mask = ASP_CTRL_UMAC1_FLUSH_MASK;
+ break;
+ case 2:
+ mask = ASP_CTRL_SPB_FLUSH_MASK;
+ break;
+ default:
+ /* Not valid port */
+ return;
+ }
+
+ rx_ctrl_core_wl(priv, mask, priv->hw_info->rx_ctrl_flush);
+}
+
+static void bcmasp_netfilt_hw_en_wake(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt)
+{
+ rx_filter_core_wl(priv, ASP_RX_FILTER_NET_OFFSET_L3_1(64),
+ ASP_RX_FILTER_NET_OFFSET(nfilt->hw_index));
+
+ rx_filter_core_wl(priv, ASP_RX_FILTER_NET_OFFSET_L2(32) |
+ ASP_RX_FILTER_NET_OFFSET_L3_0(32) |
+ ASP_RX_FILTER_NET_OFFSET_L3_1(96) |
+ ASP_RX_FILTER_NET_OFFSET_L4(32),
+ ASP_RX_FILTER_NET_OFFSET(nfilt->hw_index + 1));
+
+ rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->port + 8) |
+ ASP_RX_FILTER_NET_CFG_EN |
+ ASP_RX_FILTER_NET_CFG_L2_EN |
+ ASP_RX_FILTER_NET_CFG_L3_EN |
+ ASP_RX_FILTER_NET_CFG_L4_EN |
+ ASP_RX_FILTER_NET_CFG_L3_FRM(2) |
+ ASP_RX_FILTER_NET_CFG_L4_FRM(2) |
+ ASP_RX_FILTER_NET_CFG_UMC(nfilt->port),
+ ASP_RX_FILTER_NET_CFG(nfilt->hw_index));
+
+ rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->port + 8) |
+ ASP_RX_FILTER_NET_CFG_EN |
+ ASP_RX_FILTER_NET_CFG_L2_EN |
+ ASP_RX_FILTER_NET_CFG_L3_EN |
+ ASP_RX_FILTER_NET_CFG_L4_EN |
+ ASP_RX_FILTER_NET_CFG_L3_FRM(2) |
+ ASP_RX_FILTER_NET_CFG_L4_FRM(2) |
+ ASP_RX_FILTER_NET_CFG_UMC(nfilt->port),
+ ASP_RX_FILTER_NET_CFG(nfilt->hw_index + 1));
+}
+
+#define MAX_WAKE_FILTER_SIZE 256
+enum asp_netfilt_reg_type {
+ ASP_NETFILT_MATCH = 0,
+ ASP_NETFILT_MASK,
+ ASP_NETFILT_MAX
+};
+
+static int bcmasp_netfilt_get_reg_offset(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ enum asp_netfilt_reg_type reg_type,
+ u32 offset)
+{
+ u32 block_index, filter_sel;
+
+ if (offset < 32) {
+ block_index = ASP_RX_FILTER_NET_L2;
+ filter_sel = nfilt->hw_index;
+ } else if (offset < 64) {
+ block_index = ASP_RX_FILTER_NET_L2;
+ filter_sel = nfilt->hw_index + 1;
+ } else if (offset < 96) {
+ block_index = ASP_RX_FILTER_NET_L3_0;
+ filter_sel = nfilt->hw_index;
+ } else if (offset < 128) {
+ block_index = ASP_RX_FILTER_NET_L3_0;
+ filter_sel = nfilt->hw_index + 1;
+ } else if (offset < 160) {
+ block_index = ASP_RX_FILTER_NET_L3_1;
+ filter_sel = nfilt->hw_index;
+ } else if (offset < 192) {
+ block_index = ASP_RX_FILTER_NET_L3_1;
+ filter_sel = nfilt->hw_index + 1;
+ } else if (offset < 224) {
+ block_index = ASP_RX_FILTER_NET_L4;
+ filter_sel = nfilt->hw_index;
+ } else if (offset < 256) {
+ block_index = ASP_RX_FILTER_NET_L4;
+ filter_sel = nfilt->hw_index + 1;
+ } else {
+ return -EINVAL;
+ }
+
+ switch (reg_type) {
+ case ASP_NETFILT_MATCH:
+ return ASP_RX_FILTER_NET_PAT(filter_sel, block_index,
+ (offset % 32));
+ case ASP_NETFILT_MASK:
+ return ASP_RX_FILTER_NET_MASK(filter_sel, block_index,
+ (offset % 32));
+ default:
+ return -EINVAL;
+ }
+}
+
+static void bcmasp_netfilt_wr(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ enum asp_netfilt_reg_type reg_type,
+ u32 val, u32 offset)
+{
+ int reg_offset;
+
+ /* HW only accepts 4 byte aligned writes */
+ if (!IS_ALIGNED(offset, 4) || offset > MAX_WAKE_FILTER_SIZE)
+ return;
+
+ reg_offset = bcmasp_netfilt_get_reg_offset(priv, nfilt, reg_type,
+ offset);
+
+ rx_filter_core_wl(priv, val, reg_offset);
+}
+
+static u32 bcmasp_netfilt_rd(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ enum asp_netfilt_reg_type reg_type,
+ u32 offset)
+{
+ int reg_offset;
+
+ /* HW only accepts 4 byte aligned writes */
+ if (!IS_ALIGNED(offset, 4) || offset > MAX_WAKE_FILTER_SIZE)
+ return 0;
+
+ reg_offset = bcmasp_netfilt_get_reg_offset(priv, nfilt, reg_type,
+ offset);
+
+ return rx_filter_core_rl(priv, reg_offset);
+}
+
+static int bcmasp_netfilt_wr_m_wake(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ u32 offset, void *match, void *mask,
+ size_t size)
+{
+ u32 shift, mask_val = 0, match_val = 0;
+ bool first_byte = true;
+
+ if ((offset + size) > MAX_WAKE_FILTER_SIZE)
+ return -EINVAL;
+
+ while (size--) {
+ /* The HW only accepts 4 byte aligned writes, so if we
+ * begin unaligned or if remaining bytes less than 4,
+ * we need to read then write to avoid losing current
+ * register state
+ */
+ if (first_byte && (!IS_ALIGNED(offset, 4) || size < 3)) {
+ match_val = bcmasp_netfilt_rd(priv, nfilt,
+ ASP_NETFILT_MATCH,
+ ALIGN_DOWN(offset, 4));
+ mask_val = bcmasp_netfilt_rd(priv, nfilt,
+ ASP_NETFILT_MASK,
+ ALIGN_DOWN(offset, 4));
+ }
+
+ shift = (3 - (offset % 4)) * 8;
+ match_val &= ~GENMASK(shift + 7, shift);
+ mask_val &= ~GENMASK(shift + 7, shift);
+ match_val |= (u32)(*((u8 *)match) << shift);
+ mask_val |= (u32)(*((u8 *)mask) << shift);
+
+ /* If last byte or last byte of word, write to reg */
+ if (!size || ((offset % 4) == 3)) {
+ bcmasp_netfilt_wr(priv, nfilt, ASP_NETFILT_MATCH,
+ match_val, ALIGN_DOWN(offset, 4));
+ bcmasp_netfilt_wr(priv, nfilt, ASP_NETFILT_MASK,
+ mask_val, ALIGN_DOWN(offset, 4));
+ first_byte = true;
+ } else {
+ first_byte = false;
+ }
+
+ offset++;
+ match++;
+ mask++;
+ }
+
+ return 0;
+}
+
+static void bcmasp_netfilt_reset_hw(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt)
+{
+ int i;
+
+ for (i = 0; i < MAX_WAKE_FILTER_SIZE; i += 4) {
+ bcmasp_netfilt_wr(priv, nfilt, ASP_NETFILT_MATCH, 0, i);
+ bcmasp_netfilt_wr(priv, nfilt, ASP_NETFILT_MASK, 0, i);
+ }
+}
+
+static void bcmasp_netfilt_tcpip4_wr(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ struct ethtool_tcpip4_spec *match,
+ struct ethtool_tcpip4_spec *mask,
+ u32 offset)
+{
+ __be16 val_16, mask_16;
+
+ val_16 = htons(ETH_P_IP);
+ mask_16 = htons(0xFFFF);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, (ETH_ALEN * 2) + offset,
+ &val_16, &mask_16, sizeof(val_16));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 1,
+ &match->tos, &mask->tos,
+ sizeof(match->tos));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 12,
+ &match->ip4src, &mask->ip4src,
+ sizeof(match->ip4src));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 16,
+ &match->ip4dst, &mask->ip4dst,
+ sizeof(match->ip4dst));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 20,
+ &match->psrc, &mask->psrc,
+ sizeof(match->psrc));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 22,
+ &match->pdst, &mask->pdst,
+ sizeof(match->pdst));
+}
+
+static void bcmasp_netfilt_tcpip6_wr(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt,
+ struct ethtool_tcpip6_spec *match,
+ struct ethtool_tcpip6_spec *mask,
+ u32 offset)
+{
+ __be16 val_16, mask_16;
+
+ val_16 = htons(ETH_P_IPV6);
+ mask_16 = htons(0xFFFF);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, (ETH_ALEN * 2) + offset,
+ &val_16, &mask_16, sizeof(val_16));
+ val_16 = htons(match->tclass << 4);
+ mask_16 = htons(mask->tclass << 4);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset,
+ &val_16, &mask_16, sizeof(val_16));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 8,
+ &match->ip6src, &mask->ip6src,
+ sizeof(match->ip6src));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 24,
+ &match->ip6dst, &mask->ip6dst,
+ sizeof(match->ip6dst));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 40,
+ &match->psrc, &mask->psrc,
+ sizeof(match->psrc));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 42,
+ &match->pdst, &mask->pdst,
+ sizeof(match->pdst));
+}
+
+static int bcmasp_netfilt_wr_to_hw(struct bcmasp_priv *priv,
+ struct bcmasp_net_filter *nfilt)
+{
+ struct ethtool_rx_flow_spec *fs = &nfilt->fs;
+ unsigned int offset = 0;
+ __be16 val_16, mask_16;
+ u8 val_8, mask_8;
+
+ /* Currently only supports wake filters */
+ if (!nfilt->wake_filter)
+ return -EINVAL;
+
+ bcmasp_netfilt_reset_hw(priv, nfilt);
+
+ if (fs->flow_type & FLOW_MAC_EXT) {
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, 0, &fs->h_ext.h_dest,
+ &fs->m_ext.h_dest,
+ sizeof(fs->h_ext.h_dest));
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->m_ext.vlan_etype || fs->m_ext.vlan_tci)) {
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, (ETH_ALEN * 2),
+ &fs->h_ext.vlan_etype,
+ &fs->m_ext.vlan_etype,
+ sizeof(fs->h_ext.vlan_etype));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ((ETH_ALEN * 2) + 2),
+ &fs->h_ext.vlan_tci,
+ &fs->m_ext.vlan_tci,
+ sizeof(fs->h_ext.vlan_tci));
+ offset += VLAN_HLEN;
+ }
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, 0,
+ &fs->h_u.ether_spec.h_dest,
+ &fs->m_u.ether_spec.h_dest,
+ sizeof(fs->h_u.ether_spec.h_dest));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_ALEN,
+ &fs->h_u.ether_spec.h_source,
+ &fs->m_u.ether_spec.h_source,
+ sizeof(fs->h_u.ether_spec.h_source));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, (ETH_ALEN * 2) + offset,
+ &fs->h_u.ether_spec.h_proto,
+ &fs->m_u.ether_spec.h_proto,
+ sizeof(fs->h_u.ether_spec.h_proto));
+
+ break;
+ case IP_USER_FLOW:
+ val_16 = htons(ETH_P_IP);
+ mask_16 = htons(0xFFFF);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, (ETH_ALEN * 2) + offset,
+ &val_16, &mask_16, sizeof(val_16));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 1,
+ &fs->h_u.usr_ip4_spec.tos,
+ &fs->m_u.usr_ip4_spec.tos,
+ sizeof(fs->h_u.usr_ip4_spec.tos));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 9,
+ &fs->h_u.usr_ip4_spec.proto,
+ &fs->m_u.usr_ip4_spec.proto,
+ sizeof(fs->h_u.usr_ip4_spec.proto));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 12,
+ &fs->h_u.usr_ip4_spec.ip4src,
+ &fs->m_u.usr_ip4_spec.ip4src,
+ sizeof(fs->h_u.usr_ip4_spec.ip4src));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 16,
+ &fs->h_u.usr_ip4_spec.ip4dst,
+ &fs->m_u.usr_ip4_spec.ip4dst,
+ sizeof(fs->h_u.usr_ip4_spec.ip4dst));
+ if (!fs->m_u.usr_ip4_spec.l4_4_bytes)
+ break;
+
+ /* Only supports 20 byte IPv4 header */
+ val_8 = 0x45;
+ mask_8 = 0xFF;
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset,
+ &val_8, &mask_8, sizeof(val_8));
+ bcmasp_netfilt_wr_m_wake(priv, nfilt,
+ ETH_HLEN + 20 + offset,
+ &fs->h_u.usr_ip4_spec.l4_4_bytes,
+ &fs->m_u.usr_ip4_spec.l4_4_bytes,
+ sizeof(fs->h_u.usr_ip4_spec.l4_4_bytes)
+ );
+ break;
+ case TCP_V4_FLOW:
+ val_8 = IPPROTO_TCP;
+ mask_8 = 0xFF;
+ bcmasp_netfilt_tcpip4_wr(priv, nfilt, &fs->h_u.tcp_ip4_spec,
+ &fs->m_u.tcp_ip4_spec, offset);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 9,
+ &val_8, &mask_8, sizeof(val_8));
+ break;
+ case UDP_V4_FLOW:
+ val_8 = IPPROTO_UDP;
+ mask_8 = 0xFF;
+ bcmasp_netfilt_tcpip4_wr(priv, nfilt, &fs->h_u.udp_ip4_spec,
+ &fs->m_u.udp_ip4_spec, offset);
+
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 9,
+ &val_8, &mask_8, sizeof(val_8));
+ break;
+ case TCP_V6_FLOW:
+ val_8 = IPPROTO_TCP;
+ mask_8 = 0xFF;
+ bcmasp_netfilt_tcpip6_wr(priv, nfilt, &fs->h_u.tcp_ip6_spec,
+ &fs->m_u.tcp_ip6_spec, offset);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 6,
+ &val_8, &mask_8, sizeof(val_8));
+ break;
+ case UDP_V6_FLOW:
+ val_8 = IPPROTO_UDP;
+ mask_8 = 0xFF;
+ bcmasp_netfilt_tcpip6_wr(priv, nfilt, &fs->h_u.udp_ip6_spec,
+ &fs->m_u.udp_ip6_spec, offset);
+ bcmasp_netfilt_wr_m_wake(priv, nfilt, ETH_HLEN + offset + 6,
+ &val_8, &mask_8, sizeof(val_8));
+ break;
+ }
+
+ bcmasp_netfilt_hw_en_wake(priv, nfilt);
+
+ return 0;
+}
+
+void bcmasp_netfilt_suspend(struct bcmasp_intf *intf)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ bool write = false;
+ int ret, i;
+
+ /* Write all filters to HW */
+ for (i = 0; i < NUM_NET_FILTERS; i++) {
+ /* If the filter does not match the port, skip programming. */
+ if (!priv->net_filters[i].claimed ||
+ priv->net_filters[i].port != intf->port)
+ continue;
+
+ if (i > 0 && (i % 2) &&
+ priv->net_filters[i].wake_filter &&
+ priv->net_filters[i - 1].wake_filter)
+ continue;
+
+ ret = bcmasp_netfilt_wr_to_hw(priv, &priv->net_filters[i]);
+ if (!ret)
+ write = true;
+ }
+
+ /* Successfully programmed at least one wake filter
+ * so enable top level wake config
+ */
+ if (write)
+ rx_filter_core_wl(priv, (ASP_RX_FILTER_OPUT_EN |
+ ASP_RX_FILTER_LNR_MD |
+ ASP_RX_FILTER_GEN_WK_EN |
+ ASP_RX_FILTER_NT_FLT_EN),
+ ASP_RX_FILTER_BLK_CTRL);
+}
+
+void bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
+ u32 *rule_cnt)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ int j = 0, i;
+
+ for (i = 0; i < NUM_NET_FILTERS; i++) {
+ if (!priv->net_filters[i].claimed ||
+ priv->net_filters[i].port != intf->port)
+ continue;
+
+ if (i > 0 && (i % 2) &&
+ priv->net_filters[i].wake_filter &&
+ priv->net_filters[i - 1].wake_filter)
+ continue;
+
+ rule_locs[j++] = priv->net_filters[i].fs.location;
+ }
+
+ *rule_cnt = j;
+}
+
+int bcmasp_netfilt_get_active(struct bcmasp_intf *intf)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ int cnt = 0, i;
+
+ for (i = 0; i < NUM_NET_FILTERS; i++) {
+ if (!priv->net_filters[i].claimed ||
+ priv->net_filters[i].port != intf->port)
+ continue;
+
+ /* Skip over a wake filter pair */
+ if (i > 0 && (i % 2) &&
+ priv->net_filters[i].wake_filter &&
+ priv->net_filters[i - 1].wake_filter)
+ continue;
+
+ cnt++;
+ }
+
+ return cnt;
+}
+
+bool bcmasp_netfilt_check_dup(struct bcmasp_intf *intf,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ struct ethtool_rx_flow_spec *cur;
+ size_t fs_size = 0;
+ int i;
+
+ for (i = 0; i < NUM_NET_FILTERS; i++) {
+ if (!priv->net_filters[i].claimed ||
+ priv->net_filters[i].port != intf->port)
+ continue;
+
+ cur = &priv->net_filters[i].fs;
+
+ if (cur->flow_type != fs->flow_type ||
+ cur->ring_cookie != fs->ring_cookie)
+ continue;
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ fs_size = sizeof(struct ethhdr);
+ break;
+ case IP_USER_FLOW:
+ fs_size = sizeof(struct ethtool_usrip4_spec);
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ fs_size = sizeof(struct ethtool_tcpip6_spec);
+ break;
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ fs_size = sizeof(struct ethtool_tcpip4_spec);
+ break;
+ default:
+ continue;
+ }
+
+ if (memcmp(&cur->h_u, &fs->h_u, fs_size) ||
+ memcmp(&cur->m_u, &fs->m_u, fs_size))
+ continue;
+
+ if (cur->flow_type & FLOW_EXT) {
+ if (cur->h_ext.vlan_etype != fs->h_ext.vlan_etype ||
+ cur->m_ext.vlan_etype != fs->m_ext.vlan_etype ||
+ cur->h_ext.vlan_tci != fs->h_ext.vlan_tci ||
+ cur->m_ext.vlan_tci != fs->m_ext.vlan_tci ||
+ cur->h_ext.data[0] != fs->h_ext.data[0])
+ continue;
+ }
+ if (cur->flow_type & FLOW_MAC_EXT) {
+ if (memcmp(&cur->h_ext.h_dest,
+ &fs->h_ext.h_dest, ETH_ALEN) ||
+ memcmp(&cur->m_ext.h_dest,
+ &fs->m_ext.h_dest, ETH_ALEN))
+ continue;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/* If no network filter found, return open filter.
+ * If no more open filters return NULL
+ */
+struct bcmasp_net_filter *bcmasp_netfilt_get_init(struct bcmasp_intf *intf,
+ int loc, bool wake_filter,
+ bool init)
+{
+ struct bcmasp_net_filter *nfilter = NULL;
+ struct bcmasp_priv *priv = intf->parent;
+ int i, open_index = -1;
+
+ /* Check whether we exceed the filter table capacity */
+ if (loc != RX_CLS_LOC_ANY && loc >= NUM_NET_FILTERS)
+ return ERR_PTR(-EINVAL);
+
+ /* If the filter location is busy (already claimed) and we are initializing
+ * the filter (insertion), return a busy error code.
+ */
+ if (loc != RX_CLS_LOC_ANY && init && priv->net_filters[loc].claimed)
+ return ERR_PTR(-EBUSY);
+
+ /* We need two filters for wake-up, so we cannot use an odd filter */
+ if (wake_filter && loc != RX_CLS_LOC_ANY && (loc % 2))
+ return ERR_PTR(-EINVAL);
+
+ /* Initialize the loop index based on the desired location or from 0 */
+ i = loc == RX_CLS_LOC_ANY ? 0 : loc;
+
+ for ( ; i < NUM_NET_FILTERS; i++) {
+ /* Found matching network filter */
+ if (!init &&
+ priv->net_filters[i].claimed &&
+ priv->net_filters[i].hw_index == i &&
+ priv->net_filters[i].port == intf->port)
+ return &priv->net_filters[i];
+
+ /* If we don't need a new filter or new filter already found */
+ if (!init || open_index >= 0)
+ continue;
+
+ /* Wake filter conslidates two filters to cover more bytes
+ * Wake filter is open if...
+ * 1. It is an even filter
+ * 2. The current and next filter is not claimed
+ */
+ if (wake_filter && !(i % 2) && !priv->net_filters[i].claimed &&
+ !priv->net_filters[i + 1].claimed)
+ open_index = i;
+ else if (!priv->net_filters[i].claimed)
+ open_index = i;
+ }
+
+ if (open_index >= 0) {
+ nfilter = &priv->net_filters[open_index];
+ nfilter->claimed = true;
+ nfilter->port = intf->port;
+ nfilter->hw_index = open_index;
+ }
+
+ if (wake_filter && open_index >= 0) {
+ /* Claim next filter */
+ priv->net_filters[open_index + 1].claimed = true;
+ priv->net_filters[open_index + 1].wake_filter = true;
+ nfilter->wake_filter = true;
+ }
+
+ return nfilter ? nfilter : ERR_PTR(-EINVAL);
+}
+
+void bcmasp_netfilt_release(struct bcmasp_intf *intf,
+ struct bcmasp_net_filter *nfilt)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ if (nfilt->wake_filter) {
+ memset(&priv->net_filters[nfilt->hw_index + 1], 0,
+ sizeof(struct bcmasp_net_filter));
+ }
+
+ memset(nfilt, 0, sizeof(struct bcmasp_net_filter));
+}
+
+static void bcmasp_addr_to_uint(unsigned char *addr, u32 *high, u32 *low)
+{
+ *high = (u32)(addr[0] << 8 | addr[1]);
+ *low = (u32)(addr[2] << 24 | addr[3] << 16 | addr[4] << 8 |
+ addr[5]);
+}
+
+static void bcmasp_set_mda_filter(struct bcmasp_intf *intf,
+ const unsigned char *addr,
+ unsigned char *mask,
+ unsigned int i)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ u32 addr_h, addr_l, mask_h, mask_l;
+
+ /* Set local copy */
+ ether_addr_copy(priv->mda_filters[i].mask, mask);
+ ether_addr_copy(priv->mda_filters[i].addr, addr);
+
+ /* Write to HW */
+ bcmasp_addr_to_uint(priv->mda_filters[i].mask, &mask_h, &mask_l);
+ bcmasp_addr_to_uint(priv->mda_filters[i].addr, &addr_h, &addr_l);
+ rx_filter_core_wl(priv, addr_h, ASP_RX_FILTER_MDA_PAT_H(i));
+ rx_filter_core_wl(priv, addr_l, ASP_RX_FILTER_MDA_PAT_L(i));
+ rx_filter_core_wl(priv, mask_h, ASP_RX_FILTER_MDA_MSK_H(i));
+ rx_filter_core_wl(priv, mask_l, ASP_RX_FILTER_MDA_MSK_L(i));
+}
+
+static void bcmasp_en_mda_filter(struct bcmasp_intf *intf, bool en,
+ unsigned int i)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ if (priv->mda_filters[i].en == en)
+ return;
+
+ priv->mda_filters[i].en = en;
+ priv->mda_filters[i].port = intf->port;
+
+ rx_filter_core_wl(priv, ((intf->channel + 8) |
+ (en << ASP_RX_FILTER_MDA_CFG_EN_SHIFT) |
+ ASP_RX_FILTER_MDA_CFG_UMC_SEL(intf->port)),
+ ASP_RX_FILTER_MDA_CFG(i));
+}
+
+/* There are 32 MDA filters shared between all ports, we reserve 4 filters per
+ * port for the following.
+ * - Promisc: Filter to allow all packets when promisc is enabled
+ * - All Multicast
+ * - Broadcast
+ * - Own address
+ *
+ * The reserved filters are identified as so.
+ * - Promisc: (index * 4) + 0
+ * - All Multicast: (index * 4) + 1
+ * - Broadcast: (index * 4) + 2
+ * - Own address: (index * 4) + 3
+ */
+enum asp_rx_filter_id {
+ ASP_RX_FILTER_MDA_PROMISC = 0,
+ ASP_RX_FILTER_MDA_ALLMULTI,
+ ASP_RX_FILTER_MDA_BROADCAST,
+ ASP_RX_FILTER_MDA_OWN_ADDR,
+ ASP_RX_FILTER_MDA_RES_MAX,
+};
+
+#define ASP_RX_FILT_MDA(intf, name) (((intf)->index * \
+ ASP_RX_FILTER_MDA_RES_MAX) \
+ + ASP_RX_FILTER_MDA_##name)
+
+static int bcmasp_total_res_mda_cnt(struct bcmasp_priv *priv)
+{
+ return list_count_nodes(&priv->intfs) * ASP_RX_FILTER_MDA_RES_MAX;
+}
+
+void bcmasp_set_promisc(struct bcmasp_intf *intf, bool en)
+{
+ unsigned int i = ASP_RX_FILT_MDA(intf, PROMISC);
+ unsigned char promisc[ETH_ALEN];
+
+ eth_zero_addr(promisc);
+ /* Set mask to 00:00:00:00:00:00 to match all packets */
+ bcmasp_set_mda_filter(intf, promisc, promisc, i);
+ bcmasp_en_mda_filter(intf, en, i);
+}
+
+void bcmasp_set_allmulti(struct bcmasp_intf *intf, bool en)
+{
+ unsigned char allmulti[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00};
+ unsigned int i = ASP_RX_FILT_MDA(intf, ALLMULTI);
+
+ /* Set mask to 01:00:00:00:00:00 to match all multicast */
+ bcmasp_set_mda_filter(intf, allmulti, allmulti, i);
+ bcmasp_en_mda_filter(intf, en, i);
+}
+
+void bcmasp_set_broad(struct bcmasp_intf *intf, bool en)
+{
+ unsigned int i = ASP_RX_FILT_MDA(intf, BROADCAST);
+ unsigned char addr[ETH_ALEN];
+
+ eth_broadcast_addr(addr);
+ bcmasp_set_mda_filter(intf, addr, addr, i);
+ bcmasp_en_mda_filter(intf, en, i);
+}
+
+void bcmasp_set_oaddr(struct bcmasp_intf *intf, const unsigned char *addr,
+ bool en)
+{
+ unsigned char mask[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ unsigned int i = ASP_RX_FILT_MDA(intf, OWN_ADDR);
+
+ bcmasp_set_mda_filter(intf, addr, mask, i);
+ bcmasp_en_mda_filter(intf, en, i);
+}
+
+void bcmasp_disable_all_filters(struct bcmasp_intf *intf)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ unsigned int i;
+ int res_count;
+
+ res_count = bcmasp_total_res_mda_cnt(intf->parent);
+
+ /* Disable all filters held by this port */
+ for (i = res_count; i < NUM_MDA_FILTERS; i++) {
+ if (priv->mda_filters[i].en &&
+ priv->mda_filters[i].port == intf->port)
+ bcmasp_en_mda_filter(intf, 0, i);
+ }
+}
+
+static int bcmasp_combine_set_filter(struct bcmasp_intf *intf,
+ unsigned char *addr, unsigned char *mask,
+ int i)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ u64 addr1, addr2, mask1, mask2, mask3;
+
+ /* Switch to u64 to help with the calculations */
+ addr1 = ether_addr_to_u64(priv->mda_filters[i].addr);
+ mask1 = ether_addr_to_u64(priv->mda_filters[i].mask);
+ addr2 = ether_addr_to_u64(addr);
+ mask2 = ether_addr_to_u64(mask);
+
+ /* Check if one filter resides within the other */
+ mask3 = mask1 & mask2;
+ if (mask3 == mask1 && ((addr1 & mask1) == (addr2 & mask1))) {
+ /* Filter 2 resides within filter 1, so everything is good */
+ return 0;
+ } else if (mask3 == mask2 && ((addr1 & mask2) == (addr2 & mask2))) {
+ /* Filter 1 resides within filter 2, so swap filters */
+ bcmasp_set_mda_filter(intf, addr, mask, i);
+ return 0;
+ }
+
+ /* Unable to combine */
+ return -EINVAL;
+}
+
+int bcmasp_set_en_mda_filter(struct bcmasp_intf *intf, unsigned char *addr,
+ unsigned char *mask)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ int ret, res_count;
+ unsigned int i;
+
+ res_count = bcmasp_total_res_mda_cnt(intf->parent);
+
+ for (i = res_count; i < NUM_MDA_FILTERS; i++) {
+ /* If filter not enabled or belongs to another port skip */
+ if (!priv->mda_filters[i].en ||
+ priv->mda_filters[i].port != intf->port)
+ continue;
+
+ /* Attempt to combine filters */
+ ret = bcmasp_combine_set_filter(intf, addr, mask, i);
+ if (!ret) {
+ intf->mib.filters_combine_cnt++;
+ return 0;
+ }
+ }
+
+ /* Create new filter if possible */
+ for (i = res_count; i < NUM_MDA_FILTERS; i++) {
+ if (priv->mda_filters[i].en)
+ continue;
+
+ bcmasp_set_mda_filter(intf, addr, mask, i);
+ bcmasp_en_mda_filter(intf, 1, i);
+ return 0;
+ }
+
+ /* No room for new filter */
+ return -EINVAL;
+}
+
+static void bcmasp_core_init_filters(struct bcmasp_priv *priv)
+{
+ unsigned int i;
+
+ /* Disable all filters and reset software view since the HW
+ * can lose context while in deep sleep suspend states
+ */
+ for (i = 0; i < NUM_MDA_FILTERS; i++) {
+ rx_filter_core_wl(priv, 0x0, ASP_RX_FILTER_MDA_CFG(i));
+ priv->mda_filters[i].en = 0;
+ }
+
+ for (i = 0; i < NUM_NET_FILTERS; i++)
+ rx_filter_core_wl(priv, 0x0, ASP_RX_FILTER_NET_CFG(i));
+
+ /* Top level filter enable bit should be enabled at all times, set
+ * GEN_WAKE_CLEAR to clear the network filter wake-up which would
+ * otherwise be sticky
+ */
+ rx_filter_core_wl(priv, (ASP_RX_FILTER_OPUT_EN |
+ ASP_RX_FILTER_MDA_EN |
+ ASP_RX_FILTER_GEN_WK_CLR |
+ ASP_RX_FILTER_NT_FLT_EN),
+ ASP_RX_FILTER_BLK_CTRL);
+}
+
+/* ASP core initialization */
+static void bcmasp_core_init(struct bcmasp_priv *priv)
+{
+ tx_analytics_core_wl(priv, 0x0, ASP_TX_ANALYTICS_CTRL);
+ rx_analytics_core_wl(priv, 0x4, ASP_RX_ANALYTICS_CTRL);
+
+ rx_edpkt_core_wl(priv, (ASP_EDPKT_HDR_SZ_128 << ASP_EDPKT_HDR_SZ_SHIFT),
+ ASP_EDPKT_HDR_CFG);
+ rx_edpkt_core_wl(priv,
+ (ASP_EDPKT_ENDI_BT_SWP_WD << ASP_EDPKT_ENDI_DESC_SHIFT),
+ ASP_EDPKT_ENDI);
+
+ rx_edpkt_core_wl(priv, 0x1b, ASP_EDPKT_BURST_BUF_PSCAL_TOUT);
+ rx_edpkt_core_wl(priv, 0x3e8, ASP_EDPKT_BURST_BUF_WRITE_TOUT);
+ rx_edpkt_core_wl(priv, 0x3e8, ASP_EDPKT_BURST_BUF_READ_TOUT);
+
+ rx_edpkt_core_wl(priv, ASP_EDPKT_ENABLE_EN, ASP_EDPKT_ENABLE);
+
+ /* Disable and clear both UniMAC's wake-up interrupts to avoid
+ * sticky interrupts.
+ */
+ _intr2_mask_set(priv, ASP_INTR2_UMC0_WAKE | ASP_INTR2_UMC1_WAKE);
+ intr2_core_wl(priv, ASP_INTR2_UMC0_WAKE | ASP_INTR2_UMC1_WAKE,
+ ASP_INTR2_CLEAR);
+}
+
+static void bcmasp_core_clock_select(struct bcmasp_priv *priv, bool slow)
+{
+ u32 reg;
+
+ reg = ctrl_core_rl(priv, ASP_CTRL_CORE_CLOCK_SELECT);
+ if (slow)
+ reg &= ~ASP_CTRL_CORE_CLOCK_SELECT_MAIN;
+ else
+ reg |= ASP_CTRL_CORE_CLOCK_SELECT_MAIN;
+ ctrl_core_wl(priv, reg, ASP_CTRL_CORE_CLOCK_SELECT);
+}
+
+static void bcmasp_core_clock_set_ll(struct bcmasp_priv *priv, u32 clr, u32 set)
+{
+ u32 reg;
+
+ reg = ctrl_core_rl(priv, ASP_CTRL_CLOCK_CTRL);
+ reg &= ~clr;
+ reg |= set;
+ ctrl_core_wl(priv, reg, ASP_CTRL_CLOCK_CTRL);
+
+ reg = ctrl_core_rl(priv, ASP_CTRL_SCRATCH_0);
+ reg &= ~clr;
+ reg |= set;
+ ctrl_core_wl(priv, reg, ASP_CTRL_SCRATCH_0);
+}
+
+static void bcmasp_core_clock_set(struct bcmasp_priv *priv, u32 clr, u32 set)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->clk_lock, flags);
+ bcmasp_core_clock_set_ll(priv, clr, set);
+ spin_unlock_irqrestore(&priv->clk_lock, flags);
+}
+
+void bcmasp_core_clock_set_intf(struct bcmasp_intf *intf, bool en)
+{
+ u32 intf_mask = ASP_CTRL_CLOCK_CTRL_ASP_RGMII_DIS(intf->port);
+ struct bcmasp_priv *priv = intf->parent;
+ unsigned long flags;
+ u32 reg;
+
+ /* When enabling an interface, if the RX or TX clocks were not enabled,
+ * enable them. Conversely, while disabling an interface, if this is
+ * the last one enabled, we can turn off the shared RX and TX clocks as
+ * well. We control enable bits which is why we test for equality on
+ * the RGMII clock bit mask.
+ */
+ spin_lock_irqsave(&priv->clk_lock, flags);
+ if (en) {
+ intf_mask |= ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE |
+ ASP_CTRL_CLOCK_CTRL_ASP_RX_DISABLE;
+ bcmasp_core_clock_set_ll(priv, intf_mask, 0);
+ } else {
+ reg = ctrl_core_rl(priv, ASP_CTRL_SCRATCH_0) | intf_mask;
+ if ((reg & ASP_CTRL_CLOCK_CTRL_ASP_RGMII_MASK) ==
+ ASP_CTRL_CLOCK_CTRL_ASP_RGMII_MASK)
+ intf_mask |= ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE |
+ ASP_CTRL_CLOCK_CTRL_ASP_RX_DISABLE;
+ bcmasp_core_clock_set_ll(priv, 0, intf_mask);
+ }
+ spin_unlock_irqrestore(&priv->clk_lock, flags);
+}
+
+static irqreturn_t bcmasp_isr_wol(int irq, void *data)
+{
+ struct bcmasp_priv *priv = data;
+ u32 status;
+
+ /* No L3 IRQ, so we good */
+ if (priv->wol_irq <= 0)
+ goto irq_handled;
+
+ status = wakeup_intr2_core_rl(priv, ASP_WAKEUP_INTR2_STATUS) &
+ ~wakeup_intr2_core_rl(priv, ASP_WAKEUP_INTR2_MASK_STATUS);
+ wakeup_intr2_core_wl(priv, status, ASP_WAKEUP_INTR2_CLEAR);
+
+irq_handled:
+ pm_wakeup_event(&priv->pdev->dev, 0);
+ return IRQ_HANDLED;
+}
+
+static int bcmasp_get_and_request_irq(struct bcmasp_priv *priv, int i)
+{
+ struct platform_device *pdev = priv->pdev;
+ int irq, ret;
+
+ irq = platform_get_irq_optional(pdev, i);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(&pdev->dev, irq, bcmasp_isr_wol, 0,
+ pdev->name, priv);
+ if (ret)
+ return ret;
+
+ return irq;
+}
+
+static void bcmasp_init_wol_shared(struct bcmasp_priv *priv)
+{
+ struct platform_device *pdev = priv->pdev;
+ struct device *dev = &pdev->dev;
+ int irq;
+
+ irq = bcmasp_get_and_request_irq(priv, 1);
+ if (irq < 0) {
+ dev_warn(dev, "Failed to init WoL irq: %d\n", irq);
+ return;
+ }
+
+ priv->wol_irq = irq;
+ priv->wol_irq_enabled_mask = 0;
+ device_set_wakeup_capable(&pdev->dev, 1);
+}
+
+static void bcmasp_enable_wol_shared(struct bcmasp_intf *intf, bool en)
+{
+ struct bcmasp_priv *priv = intf->parent;
+ struct device *dev = &priv->pdev->dev;
+
+ if (en) {
+ if (priv->wol_irq_enabled_mask) {
+ set_bit(intf->port, &priv->wol_irq_enabled_mask);
+ return;
+ }
+
+ /* First enable */
+ set_bit(intf->port, &priv->wol_irq_enabled_mask);
+ enable_irq_wake(priv->wol_irq);
+ device_set_wakeup_enable(dev, 1);
+ } else {
+ if (!priv->wol_irq_enabled_mask)
+ return;
+
+ clear_bit(intf->port, &priv->wol_irq_enabled_mask);
+ if (priv->wol_irq_enabled_mask)
+ return;
+
+ /* Last disable */
+ disable_irq_wake(priv->wol_irq);
+ device_set_wakeup_enable(dev, 0);
+ }
+}
+
+static void bcmasp_wol_irq_destroy_shared(struct bcmasp_priv *priv)
+{
+ if (priv->wol_irq > 0)
+ free_irq(priv->wol_irq, priv);
+}
+
+static void bcmasp_init_wol_per_intf(struct bcmasp_priv *priv)
+{
+ struct platform_device *pdev = priv->pdev;
+ struct device *dev = &pdev->dev;
+ struct bcmasp_intf *intf;
+ int irq;
+
+ list_for_each_entry(intf, &priv->intfs, list) {
+ irq = bcmasp_get_and_request_irq(priv, intf->port + 1);
+ if (irq < 0) {
+ dev_warn(dev, "Failed to init WoL irq(port %d): %d\n",
+ intf->port, irq);
+ continue;
+ }
+
+ intf->wol_irq = irq;
+ intf->wol_irq_enabled = false;
+ device_set_wakeup_capable(&pdev->dev, 1);
+ }
+}
+
+static void bcmasp_enable_wol_per_intf(struct bcmasp_intf *intf, bool en)
+{
+ struct device *dev = &intf->parent->pdev->dev;
+
+ if (en ^ intf->wol_irq_enabled)
+ irq_set_irq_wake(intf->wol_irq, en);
+
+ intf->wol_irq_enabled = en;
+ device_set_wakeup_enable(dev, en);
+}
+
+static void bcmasp_wol_irq_destroy_per_intf(struct bcmasp_priv *priv)
+{
+ struct bcmasp_intf *intf;
+
+ list_for_each_entry(intf, &priv->intfs, list) {
+ if (intf->wol_irq > 0)
+ free_irq(intf->wol_irq, priv);
+ }
+}
+
+static struct bcmasp_hw_info v20_hw_info = {
+ .rx_ctrl_flush = ASP_RX_CTRL_FLUSH,
+ .umac2fb = UMAC2FB_OFFSET,
+ .rx_ctrl_fb_out_frame_count = ASP_RX_CTRL_FB_OUT_FRAME_COUNT,
+ .rx_ctrl_fb_filt_out_frame_count = ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT,
+ .rx_ctrl_fb_rx_fifo_depth = ASP_RX_CTRL_FB_RX_FIFO_DEPTH,
+};
+
+static const struct bcmasp_plat_data v20_plat_data = {
+ .init_wol = bcmasp_init_wol_per_intf,
+ .enable_wol = bcmasp_enable_wol_per_intf,
+ .destroy_wol = bcmasp_wol_irq_destroy_per_intf,
+ .hw_info = &v20_hw_info,
+};
+
+static struct bcmasp_hw_info v21_hw_info = {
+ .rx_ctrl_flush = ASP_RX_CTRL_FLUSH_2_1,
+ .umac2fb = UMAC2FB_OFFSET_2_1,
+ .rx_ctrl_fb_out_frame_count = ASP_RX_CTRL_FB_OUT_FRAME_COUNT_2_1,
+ .rx_ctrl_fb_filt_out_frame_count =
+ ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT_2_1,
+ .rx_ctrl_fb_rx_fifo_depth = ASP_RX_CTRL_FB_RX_FIFO_DEPTH_2_1,
+};
+
+static const struct bcmasp_plat_data v21_plat_data = {
+ .init_wol = bcmasp_init_wol_shared,
+ .enable_wol = bcmasp_enable_wol_shared,
+ .destroy_wol = bcmasp_wol_irq_destroy_shared,
+ .hw_info = &v21_hw_info,
+};
+
+static const struct of_device_id bcmasp_of_match[] = {
+ { .compatible = "brcm,asp-v2.0", .data = &v20_plat_data },
+ { .compatible = "brcm,asp-v2.1", .data = &v21_plat_data },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, bcmasp_of_match);
+
+static const struct of_device_id bcmasp_mdio_of_match[] = {
+ { .compatible = "brcm,asp-v2.1-mdio", },
+ { .compatible = "brcm,asp-v2.0-mdio", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, bcmasp_mdio_of_match);
+
+static void bcmasp_remove_intfs(struct bcmasp_priv *priv)
+{
+ struct bcmasp_intf *intf, *n;
+
+ list_for_each_entry_safe(intf, n, &priv->intfs, list) {
+ list_del(&intf->list);
+ bcmasp_interface_destroy(intf);
+ }
+}
+
+static int bcmasp_probe(struct platform_device *pdev)
+{
+ struct device_node *ports_node, *intf_node;
+ const struct bcmasp_plat_data *pdata;
+ struct device *dev = &pdev->dev;
+ struct bcmasp_priv *priv;
+ struct bcmasp_intf *intf;
+ int ret = 0, count = 0;
+ unsigned int i;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq <= 0)
+ return dev_err_probe(dev, -EINVAL, "invalid interrupt\n");
+
+ priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp");
+ if (IS_ERR(priv->clk))
+ return dev_err_probe(dev, PTR_ERR(priv->clk),
+ "failed to request clock\n");
+
+ /* Base from parent node */
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return dev_err_probe(dev, PTR_ERR(priv->base), "failed to iomap\n");
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+ if (ret)
+ return dev_err_probe(dev, ret, "unable to set DMA mask: %d\n", ret);
+
+ dev_set_drvdata(&pdev->dev, priv);
+ priv->pdev = pdev;
+ spin_lock_init(&priv->mda_lock);
+ spin_lock_init(&priv->clk_lock);
+ mutex_init(&priv->wol_lock);
+ mutex_init(&priv->net_lock);
+ INIT_LIST_HEAD(&priv->intfs);
+
+ pdata = device_get_match_data(&pdev->dev);
+ if (!pdata)
+ return dev_err_probe(dev, -EINVAL, "unable to find platform data\n");
+
+ priv->init_wol = pdata->init_wol;
+ priv->enable_wol = pdata->enable_wol;
+ priv->destroy_wol = pdata->destroy_wol;
+ priv->hw_info = pdata->hw_info;
+
+ /* Enable all clocks to ensure successful probing */
+ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
+
+ /* Switch to the main clock */
+ bcmasp_core_clock_select(priv, false);
+
+ bcmasp_intr2_mask_set_all(priv);
+ bcmasp_intr2_clear_all(priv);
+
+ ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0,
+ pdev->name, priv);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret);
+
+ /* Register mdio child nodes */
+ of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev);
+
+ /* ASP specific initialization, Needs to be done regardless of
+ * how many interfaces come up.
+ */
+ bcmasp_core_init(priv);
+ bcmasp_core_init_filters(priv);
+
+ ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports");
+ if (!ports_node) {
+ dev_warn(dev, "No ports found\n");
+ return -EINVAL;
+ }
+
+ i = 0;
+ for_each_available_child_of_node(ports_node, intf_node) {
+ intf = bcmasp_interface_create(priv, intf_node, i);
+ if (!intf) {
+ dev_err(dev, "Cannot create eth interface %d\n", i);
+ bcmasp_remove_intfs(priv);
+ goto of_put_exit;
+ }
+ list_add_tail(&intf->list, &priv->intfs);
+ i++;
+ }
+
+ /* Check and enable WoL */
+ priv->init_wol(priv);
+
+ /* Drop the clock reference count now and let ndo_open()/ndo_close()
+ * manage it for us from now on.
+ */
+ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE);
+
+ clk_disable_unprepare(priv->clk);
+
+ /* Now do the registration of the network ports which will take care
+ * of managing the clock properly.
+ */
+ list_for_each_entry(intf, &priv->intfs, list) {
+ ret = register_netdev(intf->ndev);
+ if (ret) {
+ netdev_err(intf->ndev,
+ "failed to register net_device: %d\n", ret);
+ priv->destroy_wol(priv);
+ bcmasp_remove_intfs(priv);
+ goto of_put_exit;
+ }
+ count++;
+ }
+
+ dev_info(dev, "Initialized %d port(s)\n", count);
+
+of_put_exit:
+ of_node_put(ports_node);
+ return ret;
+}
+
+static int bcmasp_remove(struct platform_device *pdev)
+{
+ struct bcmasp_priv *priv = dev_get_drvdata(&pdev->dev);
+
+ if (!priv)
+ return 0;
+
+ priv->destroy_wol(priv);
+ bcmasp_remove_intfs(priv);
+
+ return 0;
+}
+
+static void bcmasp_shutdown(struct platform_device *pdev)
+{
+ bcmasp_remove(pdev);
+}
+
+static int __maybe_unused bcmasp_suspend(struct device *d)
+{
+ struct bcmasp_priv *priv = dev_get_drvdata(d);
+ struct bcmasp_intf *intf;
+ int ret;
+
+ list_for_each_entry(intf, &priv->intfs, list) {
+ ret = bcmasp_interface_suspend(intf);
+ if (ret)
+ break;
+ }
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
+
+ /* Whether Wake-on-LAN is enabled or not, we can always disable
+ * the shared TX clock
+ */
+ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE);
+
+ bcmasp_core_clock_select(priv, true);
+
+ clk_disable_unprepare(priv->clk);
+
+ return ret;
+}
+
+static int __maybe_unused bcmasp_resume(struct device *d)
+{
+ struct bcmasp_priv *priv = dev_get_drvdata(d);
+ struct bcmasp_intf *intf;
+ int ret;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
+
+ /* Switch to the main clock domain */
+ bcmasp_core_clock_select(priv, false);
+
+ /* Re-enable all clocks for re-initialization */
+ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
+
+ bcmasp_core_init(priv);
+ bcmasp_core_init_filters(priv);
+
+ /* And disable them to let the network devices take care of them */
+ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE);
+
+ clk_disable_unprepare(priv->clk);
+
+ list_for_each_entry(intf, &priv->intfs, list) {
+ ret = bcmasp_interface_resume(intf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(bcmasp_pm_ops,
+ bcmasp_suspend, bcmasp_resume);
+
+static struct platform_driver bcmasp_driver = {
+ .probe = bcmasp_probe,
+ .remove = bcmasp_remove,
+ .shutdown = bcmasp_shutdown,
+ .driver = {
+ .name = "brcm,asp-v2",
+ .of_match_table = bcmasp_of_match,
+ .pm = &bcmasp_pm_ops,
+ },
+};
+module_platform_driver(bcmasp_driver);
+
+MODULE_DESCRIPTION("Broadcom ASP 2.0 Ethernet controller driver");
+MODULE_ALIAS("platform:brcm,asp-v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
new file mode 100644
index 000000000000..6bfcaa7f95a8
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
@@ -0,0 +1,586 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BCMASP_H
+#define __BCMASP_H
+
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <uapi/linux/ethtool.h>
+
+#define ASP_INTR2_OFFSET 0x1000
+#define ASP_INTR2_STATUS 0x0
+#define ASP_INTR2_SET 0x4
+#define ASP_INTR2_CLEAR 0x8
+#define ASP_INTR2_MASK_STATUS 0xc
+#define ASP_INTR2_MASK_SET 0x10
+#define ASP_INTR2_MASK_CLEAR 0x14
+
+#define ASP_INTR2_RX_ECH(intr) BIT(intr)
+#define ASP_INTR2_TX_DESC(intr) BIT((intr) + 14)
+#define ASP_INTR2_UMC0_WAKE BIT(22)
+#define ASP_INTR2_UMC1_WAKE BIT(28)
+
+#define ASP_WAKEUP_INTR2_OFFSET 0x1200
+#define ASP_WAKEUP_INTR2_STATUS 0x0
+#define ASP_WAKEUP_INTR2_SET 0x4
+#define ASP_WAKEUP_INTR2_CLEAR 0x8
+#define ASP_WAKEUP_INTR2_MASK_STATUS 0xc
+#define ASP_WAKEUP_INTR2_MASK_SET 0x10
+#define ASP_WAKEUP_INTR2_MASK_CLEAR 0x14
+#define ASP_WAKEUP_INTR2_MPD_0 BIT(0)
+#define ASP_WAKEUP_INTR2_MPD_1 BIT(1)
+#define ASP_WAKEUP_INTR2_FILT_0 BIT(2)
+#define ASP_WAKEUP_INTR2_FILT_1 BIT(3)
+#define ASP_WAKEUP_INTR2_FW BIT(4)
+
+#define ASP_TX_ANALYTICS_OFFSET 0x4c000
+#define ASP_TX_ANALYTICS_CTRL 0x0
+
+#define ASP_RX_ANALYTICS_OFFSET 0x98000
+#define ASP_RX_ANALYTICS_CTRL 0x0
+
+#define ASP_RX_CTRL_OFFSET 0x9f000
+#define ASP_RX_CTRL_UMAC_0_FRAME_COUNT 0x8
+#define ASP_RX_CTRL_UMAC_1_FRAME_COUNT 0xc
+#define ASP_RX_CTRL_FB_0_FRAME_COUNT 0x14
+#define ASP_RX_CTRL_FB_1_FRAME_COUNT 0x18
+#define ASP_RX_CTRL_FB_8_FRAME_COUNT 0x1c
+/* asp2.1 diverges offsets here */
+/* ASP2.0 */
+#define ASP_RX_CTRL_FB_OUT_FRAME_COUNT 0x20
+#define ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT 0x24
+#define ASP_RX_CTRL_FLUSH 0x28
+#define ASP_CTRL_UMAC0_FLUSH_MASK (BIT(0) | BIT(12))
+#define ASP_CTRL_UMAC1_FLUSH_MASK (BIT(1) | BIT(13))
+#define ASP_CTRL_SPB_FLUSH_MASK (BIT(8) | BIT(20))
+#define ASP_RX_CTRL_FB_RX_FIFO_DEPTH 0x30
+/* ASP2.1 */
+#define ASP_RX_CTRL_FB_9_FRAME_COUNT_2_1 0x20
+#define ASP_RX_CTRL_FB_10_FRAME_COUNT_2_1 0x24
+#define ASP_RX_CTRL_FB_OUT_FRAME_COUNT_2_1 0x28
+#define ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT_2_1 0x2c
+#define ASP_RX_CTRL_FLUSH_2_1 0x30
+#define ASP_RX_CTRL_FB_RX_FIFO_DEPTH_2_1 0x38
+
+#define ASP_RX_FILTER_OFFSET 0x80000
+#define ASP_RX_FILTER_BLK_CTRL 0x0
+#define ASP_RX_FILTER_OPUT_EN BIT(0)
+#define ASP_RX_FILTER_MDA_EN BIT(1)
+#define ASP_RX_FILTER_LNR_MD BIT(2)
+#define ASP_RX_FILTER_GEN_WK_EN BIT(3)
+#define ASP_RX_FILTER_GEN_WK_CLR BIT(4)
+#define ASP_RX_FILTER_NT_FLT_EN BIT(5)
+#define ASP_RX_FILTER_MDA_CFG(sel) (((sel) * 0x14) + 0x100)
+#define ASP_RX_FILTER_MDA_CFG_EN_SHIFT 8
+#define ASP_RX_FILTER_MDA_CFG_UMC_SEL(sel) ((sel) > 1 ? BIT(17) : \
+ BIT((sel) + 9))
+#define ASP_RX_FILTER_MDA_PAT_H(sel) (((sel) * 0x14) + 0x104)
+#define ASP_RX_FILTER_MDA_PAT_L(sel) (((sel) * 0x14) + 0x108)
+#define ASP_RX_FILTER_MDA_MSK_H(sel) (((sel) * 0x14) + 0x10c)
+#define ASP_RX_FILTER_MDA_MSK_L(sel) (((sel) * 0x14) + 0x110)
+#define ASP_RX_FILTER_MDA_CFG(sel) (((sel) * 0x14) + 0x100)
+#define ASP_RX_FILTER_MDA_PAT_H(sel) (((sel) * 0x14) + 0x104)
+#define ASP_RX_FILTER_MDA_PAT_L(sel) (((sel) * 0x14) + 0x108)
+#define ASP_RX_FILTER_MDA_MSK_H(sel) (((sel) * 0x14) + 0x10c)
+#define ASP_RX_FILTER_MDA_MSK_L(sel) (((sel) * 0x14) + 0x110)
+#define ASP_RX_FILTER_NET_CFG(sel) (((sel) * 0xa04) + 0x400)
+#define ASP_RX_FILTER_NET_CFG_CH(sel) ((sel) << 0)
+#define ASP_RX_FILTER_NET_CFG_EN BIT(9)
+#define ASP_RX_FILTER_NET_CFG_L2_EN BIT(10)
+#define ASP_RX_FILTER_NET_CFG_L3_EN BIT(11)
+#define ASP_RX_FILTER_NET_CFG_L4_EN BIT(12)
+#define ASP_RX_FILTER_NET_CFG_L3_FRM(sel) ((sel) << 13)
+#define ASP_RX_FILTER_NET_CFG_L4_FRM(sel) ((sel) << 15)
+#define ASP_RX_FILTER_NET_CFG_UMC(sel) BIT((sel) + 19)
+#define ASP_RX_FILTER_NET_CFG_DMA_EN BIT(27)
+
+#define ASP_RX_FILTER_NET_OFFSET_MAX 32
+#define ASP_RX_FILTER_NET_PAT(sel, block, off) \
+ (((sel) * 0xa04) + ((block) * 0x200) + (off) + 0x600)
+#define ASP_RX_FILTER_NET_MASK(sel, block, off) \
+ (((sel) * 0xa04) + ((block) * 0x200) + (off) + 0x700)
+
+#define ASP_RX_FILTER_NET_OFFSET(sel) (((sel) * 0xa04) + 0xe00)
+#define ASP_RX_FILTER_NET_OFFSET_L2(val) ((val) << 0)
+#define ASP_RX_FILTER_NET_OFFSET_L3_0(val) ((val) << 8)
+#define ASP_RX_FILTER_NET_OFFSET_L3_1(val) ((val) << 16)
+#define ASP_RX_FILTER_NET_OFFSET_L4(val) ((val) << 24)
+
+enum asp_rx_net_filter_block {
+ ASP_RX_FILTER_NET_L2 = 0,
+ ASP_RX_FILTER_NET_L3_0,
+ ASP_RX_FILTER_NET_L3_1,
+ ASP_RX_FILTER_NET_L4,
+ ASP_RX_FILTER_NET_BLOCK_MAX
+};
+
+#define ASP_EDPKT_OFFSET 0x9c000
+#define ASP_EDPKT_ENABLE 0x4
+#define ASP_EDPKT_ENABLE_EN BIT(0)
+#define ASP_EDPKT_HDR_CFG 0xc
+#define ASP_EDPKT_HDR_SZ_SHIFT 2
+#define ASP_EDPKT_HDR_SZ_32 0
+#define ASP_EDPKT_HDR_SZ_64 1
+#define ASP_EDPKT_HDR_SZ_96 2
+#define ASP_EDPKT_HDR_SZ_128 3
+#define ASP_EDPKT_BURST_BUF_PSCAL_TOUT 0x10
+#define ASP_EDPKT_BURST_BUF_WRITE_TOUT 0x14
+#define ASP_EDPKT_BURST_BUF_READ_TOUT 0x18
+#define ASP_EDPKT_RX_TS_COUNTER 0x38
+#define ASP_EDPKT_ENDI 0x48
+#define ASP_EDPKT_ENDI_DESC_SHIFT 8
+#define ASP_EDPKT_ENDI_NO_BT_SWP 0
+#define ASP_EDPKT_ENDI_BT_SWP_WD 1
+#define ASP_EDPKT_RX_PKT_CNT 0x138
+#define ASP_EDPKT_HDR_EXTR_CNT 0x13c
+#define ASP_EDPKT_HDR_OUT_CNT 0x140
+
+#define ASP_CTRL 0x101000
+#define ASP_CTRL_ASP_SW_INIT 0x04
+#define ASP_CTRL_ASP_SW_INIT_ACPUSS_CORE BIT(0)
+#define ASP_CTRL_ASP_SW_INIT_ASP_TX BIT(1)
+#define ASP_CTRL_ASP_SW_INIT_AS_RX BIT(2)
+#define ASP_CTRL_ASP_SW_INIT_ASP_RGMII_UMAC0 BIT(3)
+#define ASP_CTRL_ASP_SW_INIT_ASP_RGMII_UMAC1 BIT(4)
+#define ASP_CTRL_ASP_SW_INIT_ASP_XMEMIF BIT(5)
+#define ASP_CTRL_CLOCK_CTRL 0x04
+#define ASP_CTRL_CLOCK_CTRL_ASP_TX_DISABLE BIT(0)
+#define ASP_CTRL_CLOCK_CTRL_ASP_RX_DISABLE BIT(1)
+#define ASP_CTRL_CLOCK_CTRL_ASP_RGMII_SHIFT 2
+#define ASP_CTRL_CLOCK_CTRL_ASP_RGMII_MASK (0x7 << ASP_CTRL_CLOCK_CTRL_ASP_RGMII_SHIFT)
+#define ASP_CTRL_CLOCK_CTRL_ASP_RGMII_DIS(x) BIT(ASP_CTRL_CLOCK_CTRL_ASP_RGMII_SHIFT + (x))
+#define ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE GENMASK(4, 0)
+#define ASP_CTRL_CORE_CLOCK_SELECT 0x08
+#define ASP_CTRL_CORE_CLOCK_SELECT_MAIN BIT(0)
+#define ASP_CTRL_SCRATCH_0 0x0c
+
+struct bcmasp_tx_cb {
+ struct sk_buff *skb;
+ unsigned int bytes_sent;
+ bool last;
+
+ DEFINE_DMA_UNMAP_ADDR(dma_addr);
+ DEFINE_DMA_UNMAP_LEN(dma_len);
+};
+
+struct bcmasp_res {
+ /* Per interface resources */
+ /* Port */
+ void __iomem *umac;
+ void __iomem *umac2fb;
+ void __iomem *rgmii;
+
+ /* TX slowpath/configuration */
+ void __iomem *tx_spb_ctrl;
+ void __iomem *tx_spb_top;
+ void __iomem *tx_epkt_core;
+ void __iomem *tx_pause_ctrl;
+};
+
+#define DESC_ADDR(x) ((x) & GENMASK_ULL(39, 0))
+#define DESC_FLAGS(x) ((x) & GENMASK_ULL(63, 40))
+
+struct bcmasp_desc {
+ u64 buf;
+ #define DESC_CHKSUM BIT_ULL(40)
+ #define DESC_CRC_ERR BIT_ULL(41)
+ #define DESC_RX_SYM_ERR BIT_ULL(42)
+ #define DESC_NO_OCT_ALN BIT_ULL(43)
+ #define DESC_PKT_TRUC BIT_ULL(44)
+ /* 39:0 (TX/RX) bits 0-39 of buf addr
+ * 40 (RX) checksum
+ * 41 (RX) crc_error
+ * 42 (RX) rx_symbol_error
+ * 43 (RX) non_octet_aligned
+ * 44 (RX) pkt_truncated
+ * 45 Reserved
+ * 56:46 (RX) mac_filter_id
+ * 60:57 (RX) rx_port_num (0-unicmac0, 1-unimac1)
+ * 61 Reserved
+ * 63:62 (TX) forward CRC, overwrite CRC
+ */
+ u32 size;
+ u32 flags;
+ #define DESC_INT_EN BIT(0)
+ #define DESC_SOF BIT(1)
+ #define DESC_EOF BIT(2)
+ #define DESC_EPKT_CMD BIT(3)
+ #define DESC_SCRAM_ST BIT(8)
+ #define DESC_SCRAM_END BIT(9)
+ #define DESC_PCPP BIT(10)
+ #define DESC_PPPP BIT(11)
+ /* 0 (TX) tx_int_en
+ * 1 (TX/RX) SOF
+ * 2 (TX/RX) EOF
+ * 3 (TX) epkt_command
+ * 6:4 (TX) PA
+ * 7 (TX) pause at desc end
+ * 8 (TX) scram_start
+ * 9 (TX) scram_end
+ * 10 (TX) PCPP
+ * 11 (TX) PPPP
+ * 14:12 Reserved
+ * 15 (TX) pid ch Valid
+ * 19:16 (TX) data_pkt_type
+ * 32:20 (TX) pid_channel (RX) nw_filter_id
+ */
+};
+
+struct bcmasp_intf;
+
+struct bcmasp_intf_stats64 {
+ /* Rx Stats */
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_errors;
+ u64_stats_t rx_dropped;
+ u64_stats_t rx_crc_errs;
+ u64_stats_t rx_sym_errs;
+
+ /* Tx Stats*/
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+
+ struct u64_stats_sync syncp;
+};
+
+struct bcmasp_mib_counters {
+ u32 edpkt_ts;
+ u32 edpkt_rx_pkt_cnt;
+ u32 edpkt_hdr_ext_cnt;
+ u32 edpkt_hdr_out_cnt;
+ u32 umac_frm_cnt;
+ u32 fb_frm_cnt;
+ u32 fb_rx_fifo_depth;
+ u32 fb_out_frm_cnt;
+ u32 fb_filt_out_frm_cnt;
+ u32 alloc_rx_skb_failed;
+ u32 tx_dma_failed;
+ u32 mc_filters_full_cnt;
+ u32 uc_filters_full_cnt;
+ u32 filters_combine_cnt;
+ u32 promisc_filters_cnt;
+ u32 tx_realloc_offload_failed;
+ u32 tx_timeout_cnt;
+};
+
+struct bcmasp_intf_ops {
+ unsigned long (*rx_desc_read)(struct bcmasp_intf *intf);
+ void (*rx_buffer_write)(struct bcmasp_intf *intf, dma_addr_t addr);
+ void (*rx_desc_write)(struct bcmasp_intf *intf, dma_addr_t addr);
+ unsigned long (*tx_read)(struct bcmasp_intf *intf);
+ void (*tx_write)(struct bcmasp_intf *intf, dma_addr_t addr);
+};
+
+struct bcmasp_priv;
+
+struct bcmasp_intf {
+ struct list_head list;
+ struct net_device *ndev;
+ struct bcmasp_priv *parent;
+
+ /* ASP Ch */
+ int channel;
+ int port;
+ const struct bcmasp_intf_ops *ops;
+
+ /* Used for splitting shared resources */
+ int index;
+
+ struct napi_struct tx_napi;
+ /* TX ring, starts on a new cacheline boundary */
+ void __iomem *tx_spb_dma;
+ int tx_spb_index;
+ int tx_spb_clean_index;
+ struct bcmasp_desc *tx_spb_cpu;
+ dma_addr_t tx_spb_dma_addr;
+ dma_addr_t tx_spb_dma_valid;
+ dma_addr_t tx_spb_dma_read;
+ struct bcmasp_tx_cb *tx_cbs;
+
+ /* RX ring, starts on a new cacheline boundary */
+ void __iomem *rx_edpkt_cfg;
+ void __iomem *rx_edpkt_dma;
+ int rx_edpkt_index;
+ int rx_buf_order;
+ struct bcmasp_desc *rx_edpkt_cpu;
+ dma_addr_t rx_edpkt_dma_addr;
+ dma_addr_t rx_edpkt_dma_read;
+
+ /* RX buffer prefetcher ring*/
+ void *rx_ring_cpu;
+ dma_addr_t rx_ring_dma;
+ dma_addr_t rx_ring_dma_valid;
+ struct napi_struct rx_napi;
+
+ struct bcmasp_res res;
+ unsigned int crc_fwd;
+
+ /* PHY device */
+ struct device_node *phy_dn;
+ struct device_node *ndev_dn;
+ phy_interface_t phy_interface;
+ bool internal_phy;
+ int old_pause;
+ int old_link;
+ int old_duplex;
+
+ u32 msg_enable;
+
+ /* Statistics */
+ struct bcmasp_intf_stats64 stats64;
+ struct bcmasp_mib_counters mib;
+
+ u32 wolopts;
+ u8 sopass[SOPASS_MAX];
+ /* Used if per intf wol irq */
+ int wol_irq;
+ unsigned int wol_irq_enabled:1;
+
+ struct ethtool_eee eee;
+};
+
+#define NUM_NET_FILTERS 32
+struct bcmasp_net_filter {
+ struct ethtool_rx_flow_spec fs;
+
+ bool claimed;
+ bool wake_filter;
+
+ int port;
+ unsigned int hw_index;
+};
+
+#define NUM_MDA_FILTERS 32
+struct bcmasp_mda_filter {
+ /* Current owner of this filter */
+ int port;
+ bool en;
+ u8 addr[ETH_ALEN];
+ u8 mask[ETH_ALEN];
+};
+
+struct bcmasp_hw_info {
+ u32 rx_ctrl_flush;
+ u32 umac2fb;
+ u32 rx_ctrl_fb_out_frame_count;
+ u32 rx_ctrl_fb_filt_out_frame_count;
+ u32 rx_ctrl_fb_rx_fifo_depth;
+};
+
+struct bcmasp_plat_data {
+ void (*init_wol)(struct bcmasp_priv *priv);
+ void (*enable_wol)(struct bcmasp_intf *intf, bool en);
+ void (*destroy_wol)(struct bcmasp_priv *priv);
+ struct bcmasp_hw_info *hw_info;
+};
+
+struct bcmasp_priv {
+ struct platform_device *pdev;
+ struct clk *clk;
+
+ int irq;
+ u32 irq_mask;
+
+ /* Used if shared wol irq */
+ struct mutex wol_lock;
+ int wol_irq;
+ unsigned long wol_irq_enabled_mask;
+
+ void (*init_wol)(struct bcmasp_priv *priv);
+ void (*enable_wol)(struct bcmasp_intf *intf, bool en);
+ void (*destroy_wol)(struct bcmasp_priv *priv);
+
+ void __iomem *base;
+ struct bcmasp_hw_info *hw_info;
+
+ struct list_head intfs;
+
+ struct bcmasp_mda_filter mda_filters[NUM_MDA_FILTERS];
+
+ /* MAC destination address filters lock */
+ spinlock_t mda_lock;
+
+ /* Protects accesses to ASP_CTRL_CLOCK_CTRL */
+ spinlock_t clk_lock;
+
+ struct bcmasp_net_filter net_filters[NUM_NET_FILTERS];
+
+ /* Network filter lock */
+ struct mutex net_lock;
+};
+
+static inline unsigned long bcmasp_intf_rx_desc_read(struct bcmasp_intf *intf)
+{
+ return intf->ops->rx_desc_read(intf);
+}
+
+static inline void bcmasp_intf_rx_buffer_write(struct bcmasp_intf *intf,
+ dma_addr_t addr)
+{
+ intf->ops->rx_buffer_write(intf, addr);
+}
+
+static inline void bcmasp_intf_rx_desc_write(struct bcmasp_intf *intf,
+ dma_addr_t addr)
+{
+ intf->ops->rx_desc_write(intf, addr);
+}
+
+static inline unsigned long bcmasp_intf_tx_read(struct bcmasp_intf *intf)
+{
+ return intf->ops->tx_read(intf);
+}
+
+static inline void bcmasp_intf_tx_write(struct bcmasp_intf *intf,
+ dma_addr_t addr)
+{
+ intf->ops->tx_write(intf, addr);
+}
+
+#define __BCMASP_IO_MACRO(name, m) \
+static inline u32 name##_rl(struct bcmasp_intf *intf, u32 off) \
+{ \
+ u32 reg = readl_relaxed(intf->m + off); \
+ return reg; \
+} \
+static inline void name##_wl(struct bcmasp_intf *intf, u32 val, u32 off)\
+{ \
+ writel_relaxed(val, intf->m + off); \
+}
+
+#define BCMASP_IO_MACRO(name) __BCMASP_IO_MACRO(name, res.name)
+#define BCMASP_FP_IO_MACRO(name) __BCMASP_IO_MACRO(name, name)
+
+BCMASP_IO_MACRO(umac);
+BCMASP_IO_MACRO(umac2fb);
+BCMASP_IO_MACRO(rgmii);
+BCMASP_FP_IO_MACRO(tx_spb_dma);
+BCMASP_IO_MACRO(tx_spb_ctrl);
+BCMASP_IO_MACRO(tx_spb_top);
+BCMASP_IO_MACRO(tx_epkt_core);
+BCMASP_IO_MACRO(tx_pause_ctrl);
+BCMASP_FP_IO_MACRO(rx_edpkt_dma);
+BCMASP_FP_IO_MACRO(rx_edpkt_cfg);
+
+#define __BCMASP_FP_IO_MACRO_Q(name, m) \
+static inline u64 name##_rq(struct bcmasp_intf *intf, u32 off) \
+{ \
+ u64 reg = readq_relaxed(intf->m + off); \
+ return reg; \
+} \
+static inline void name##_wq(struct bcmasp_intf *intf, u64 val, u32 off)\
+{ \
+ writeq_relaxed(val, intf->m + off); \
+}
+
+#define BCMASP_FP_IO_MACRO_Q(name) __BCMASP_FP_IO_MACRO_Q(name, name)
+
+BCMASP_FP_IO_MACRO_Q(tx_spb_dma);
+BCMASP_FP_IO_MACRO_Q(rx_edpkt_dma);
+BCMASP_FP_IO_MACRO_Q(rx_edpkt_cfg);
+
+#define PKT_OFFLOAD_NOP (0 << 28)
+#define PKT_OFFLOAD_HDR_OP (1 << 28)
+#define PKT_OFFLOAD_HDR_WRBACK BIT(19)
+#define PKT_OFFLOAD_HDR_COUNT(x) ((x) << 16)
+#define PKT_OFFLOAD_HDR_SIZE_1(x) ((x) << 4)
+#define PKT_OFFLOAD_HDR_SIZE_2(x) (x)
+#define PKT_OFFLOAD_HDR2_SIZE_2(x) ((x) << 24)
+#define PKT_OFFLOAD_HDR2_SIZE_3(x) ((x) << 12)
+#define PKT_OFFLOAD_HDR2_SIZE_4(x) (x)
+#define PKT_OFFLOAD_EPKT_OP (2 << 28)
+#define PKT_OFFLOAD_EPKT_WRBACK BIT(23)
+#define PKT_OFFLOAD_EPKT_IP(x) ((x) << 21)
+#define PKT_OFFLOAD_EPKT_TP(x) ((x) << 19)
+#define PKT_OFFLOAD_EPKT_LEN(x) ((x) << 16)
+#define PKT_OFFLOAD_EPKT_CSUM_L3 BIT(15)
+#define PKT_OFFLOAD_EPKT_CSUM_L2 BIT(14)
+#define PKT_OFFLOAD_EPKT_ID(x) ((x) << 12)
+#define PKT_OFFLOAD_EPKT_SEQ(x) ((x) << 10)
+#define PKT_OFFLOAD_EPKT_TS(x) ((x) << 8)
+#define PKT_OFFLOAD_EPKT_BLOC(x) (x)
+#define PKT_OFFLOAD_END_OP (7 << 28)
+
+struct bcmasp_pkt_offload {
+ __be32 nop;
+ __be32 header;
+ __be32 header2;
+ __be32 epkt;
+ __be32 end;
+};
+
+#define BCMASP_CORE_IO_MACRO(name, offset) \
+static inline u32 name##_core_rl(struct bcmasp_priv *priv, \
+ u32 off) \
+{ \
+ u32 reg = readl_relaxed(priv->base + (offset) + off); \
+ return reg; \
+} \
+static inline void name##_core_wl(struct bcmasp_priv *priv, \
+ u32 val, u32 off) \
+{ \
+ writel_relaxed(val, priv->base + (offset) + off); \
+}
+
+BCMASP_CORE_IO_MACRO(intr2, ASP_INTR2_OFFSET);
+BCMASP_CORE_IO_MACRO(wakeup_intr2, ASP_WAKEUP_INTR2_OFFSET);
+BCMASP_CORE_IO_MACRO(tx_analytics, ASP_TX_ANALYTICS_OFFSET);
+BCMASP_CORE_IO_MACRO(rx_analytics, ASP_RX_ANALYTICS_OFFSET);
+BCMASP_CORE_IO_MACRO(rx_ctrl, ASP_RX_CTRL_OFFSET);
+BCMASP_CORE_IO_MACRO(rx_filter, ASP_RX_FILTER_OFFSET);
+BCMASP_CORE_IO_MACRO(rx_edpkt, ASP_EDPKT_OFFSET);
+BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL);
+
+struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv,
+ struct device_node *ndev_dn, int i);
+
+void bcmasp_interface_destroy(struct bcmasp_intf *intf);
+
+void bcmasp_enable_tx_irq(struct bcmasp_intf *intf, int en);
+
+void bcmasp_enable_rx_irq(struct bcmasp_intf *intf, int en);
+
+void bcmasp_flush_rx_port(struct bcmasp_intf *intf);
+
+extern const struct ethtool_ops bcmasp_ethtool_ops;
+
+int bcmasp_interface_suspend(struct bcmasp_intf *intf);
+
+int bcmasp_interface_resume(struct bcmasp_intf *intf);
+
+void bcmasp_set_promisc(struct bcmasp_intf *intf, bool en);
+
+void bcmasp_set_allmulti(struct bcmasp_intf *intf, bool en);
+
+void bcmasp_set_broad(struct bcmasp_intf *intf, bool en);
+
+void bcmasp_set_oaddr(struct bcmasp_intf *intf, const unsigned char *addr,
+ bool en);
+
+int bcmasp_set_en_mda_filter(struct bcmasp_intf *intf, unsigned char *addr,
+ unsigned char *mask);
+
+void bcmasp_disable_all_filters(struct bcmasp_intf *intf);
+
+void bcmasp_core_clock_set_intf(struct bcmasp_intf *intf, bool en);
+
+struct bcmasp_net_filter *bcmasp_netfilt_get_init(struct bcmasp_intf *intf,
+ int loc, bool wake_filter,
+ bool init);
+
+bool bcmasp_netfilt_check_dup(struct bcmasp_intf *intf,
+ struct ethtool_rx_flow_spec *fs);
+
+void bcmasp_netfilt_release(struct bcmasp_intf *intf,
+ struct bcmasp_net_filter *nfilt);
+
+int bcmasp_netfilt_get_active(struct bcmasp_intf *intf);
+
+void bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
+ u32 *rule_cnt);
+
+void bcmasp_netfilt_suspend(struct bcmasp_intf *intf);
+
+void bcmasp_eee_enable_set(struct bcmasp_intf *intf, bool enable);
+#endif
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
new file mode 100644
index 000000000000..c4f1604d5ab3
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "bcmasp_ethtool: " fmt
+
+#include <asm-generic/unaligned.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+
+#include "bcmasp.h"
+#include "bcmasp_intf_defs.h"
+
+enum bcmasp_stat_type {
+ BCMASP_STAT_RX_EDPKT,
+ BCMASP_STAT_RX_CTRL,
+ BCMASP_STAT_RX_CTRL_PER_INTF,
+ BCMASP_STAT_SOFT,
+};
+
+struct bcmasp_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ enum bcmasp_stat_type type;
+ u32 reg_offset;
+};
+
+#define STAT_BCMASP_SOFT_MIB(str) { \
+ .stat_string = str, \
+ .type = BCMASP_STAT_SOFT, \
+}
+
+#define STAT_BCMASP_OFFSET(str, _type, offset) { \
+ .stat_string = str, \
+ .type = _type, \
+ .reg_offset = offset, \
+}
+
+#define STAT_BCMASP_RX_EDPKT(str, offset) \
+ STAT_BCMASP_OFFSET(str, BCMASP_STAT_RX_EDPKT, offset)
+#define STAT_BCMASP_RX_CTRL(str, offset) \
+ STAT_BCMASP_OFFSET(str, BCMASP_STAT_RX_CTRL, offset)
+#define STAT_BCMASP_RX_CTRL_PER_INTF(str, offset) \
+ STAT_BCMASP_OFFSET(str, BCMASP_STAT_RX_CTRL_PER_INTF, offset)
+
+/* Must match the order of struct bcmasp_mib_counters */
+static const struct bcmasp_stats bcmasp_gstrings_stats[] = {
+ /* EDPKT counters */
+ STAT_BCMASP_RX_EDPKT("RX Time Stamp", ASP_EDPKT_RX_TS_COUNTER),
+ STAT_BCMASP_RX_EDPKT("RX PKT Count", ASP_EDPKT_RX_PKT_CNT),
+ STAT_BCMASP_RX_EDPKT("RX PKT Buffered", ASP_EDPKT_HDR_EXTR_CNT),
+ STAT_BCMASP_RX_EDPKT("RX PKT Pushed to DRAM", ASP_EDPKT_HDR_OUT_CNT),
+ /* ASP RX control */
+ STAT_BCMASP_RX_CTRL_PER_INTF("Frames From Unimac",
+ ASP_RX_CTRL_UMAC_0_FRAME_COUNT),
+ STAT_BCMASP_RX_CTRL_PER_INTF("Frames From Port",
+ ASP_RX_CTRL_FB_0_FRAME_COUNT),
+ STAT_BCMASP_RX_CTRL_PER_INTF("RX Buffer FIFO Depth",
+ ASP_RX_CTRL_FB_RX_FIFO_DEPTH),
+ STAT_BCMASP_RX_CTRL("Frames Out(Buffer)",
+ ASP_RX_CTRL_FB_OUT_FRAME_COUNT),
+ STAT_BCMASP_RX_CTRL("Frames Out(Filters)",
+ ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT),
+ /* Software maintained statistics */
+ STAT_BCMASP_SOFT_MIB("RX SKB Alloc Failed"),
+ STAT_BCMASP_SOFT_MIB("TX DMA Failed"),
+ STAT_BCMASP_SOFT_MIB("Multicast Filters Full"),
+ STAT_BCMASP_SOFT_MIB("Unicast Filters Full"),
+ STAT_BCMASP_SOFT_MIB("MDA Filters Combined"),
+ STAT_BCMASP_SOFT_MIB("Promisc Filter Set"),
+ STAT_BCMASP_SOFT_MIB("TX Realloc For Offload Failed"),
+ STAT_BCMASP_SOFT_MIB("Tx Timeout Count"),
+};
+
+#define BCMASP_STATS_LEN ARRAY_SIZE(bcmasp_gstrings_stats)
+
+static u16 bcmasp_stat_fixup_offset(struct bcmasp_intf *intf,
+ const struct bcmasp_stats *s)
+{
+ struct bcmasp_priv *priv = intf->parent;
+
+ if (!strcmp("Frames Out(Buffer)", s->stat_string))
+ return priv->hw_info->rx_ctrl_fb_out_frame_count;
+
+ if (!strcmp("Frames Out(Filters)", s->stat_string))
+ return priv->hw_info->rx_ctrl_fb_filt_out_frame_count;
+
+ if (!strcmp("RX Buffer FIFO Depth", s->stat_string))
+ return priv->hw_info->rx_ctrl_fb_rx_fifo_depth;
+
+ return s->reg_offset;
+}
+
+static int bcmasp_get_sset_count(struct net_device *dev, int string_set)
+{
+ switch (string_set) {
+ case ETH_SS_STATS:
+ return BCMASP_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void bcmasp_get_strings(struct net_device *dev, u32 stringset,
+ u8 *data)
+{
+ unsigned int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < BCMASP_STATS_LEN; i++) {
+ memcpy(data + i * ETH_GSTRING_LEN,
+ bcmasp_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ }
+ break;
+ default:
+ return;
+ }
+}
+
+static void bcmasp_update_mib_counters(struct bcmasp_intf *intf)
+{
+ unsigned int i;
+
+ for (i = 0; i < BCMASP_STATS_LEN; i++) {
+ const struct bcmasp_stats *s;
+ u32 offset, val;
+ char *p;
+
+ s = &bcmasp_gstrings_stats[i];
+ offset = bcmasp_stat_fixup_offset(intf, s);
+ switch (s->type) {
+ case BCMASP_STAT_SOFT:
+ continue;
+ case BCMASP_STAT_RX_EDPKT:
+ val = rx_edpkt_core_rl(intf->parent, offset);
+ break;
+ case BCMASP_STAT_RX_CTRL:
+ val = rx_ctrl_core_rl(intf->parent, offset);
+ break;
+ case BCMASP_STAT_RX_CTRL_PER_INTF:
+ offset += sizeof(u32) * intf->port;
+ val = rx_ctrl_core_rl(intf->parent, offset);
+ break;
+ default:
+ continue;
+ }
+ p = (char *)(&intf->mib) + (i * sizeof(u32));
+ put_unaligned(val, (u32 *)p);
+ }
+}
+
+static void bcmasp_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ unsigned int i;
+ char *p;
+
+ if (netif_running(dev))
+ bcmasp_update_mib_counters(intf);
+
+ for (i = 0; i < BCMASP_STATS_LEN; i++) {
+ p = (char *)(&intf->mib) + (i * sizeof(u32));
+ data[i] = *(u32 *)p;
+ }
+}
+
+static void bcmasp_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strscpy(info->driver, "bcmasp", sizeof(info->driver));
+ strscpy(info->bus_info, dev_name(dev->dev.parent),
+ sizeof(info->bus_info));
+}
+
+static u32 bcmasp_get_msglevel(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ return intf->msg_enable;
+}
+
+static void bcmasp_set_msglevel(struct net_device *dev, u32 level)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ intf->msg_enable = level;
+}
+
+#define BCMASP_SUPPORTED_WAKE (WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER)
+static void bcmasp_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ wol->supported = BCMASP_SUPPORTED_WAKE;
+ wol->wolopts = intf->wolopts;
+ memset(wol->sopass, 0, sizeof(wol->sopass));
+
+ if (wol->wolopts & WAKE_MAGICSECURE)
+ memcpy(wol->sopass, intf->sopass, sizeof(intf->sopass));
+}
+
+static int bcmasp_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct bcmasp_priv *priv = intf->parent;
+ struct device *kdev = &priv->pdev->dev;
+
+ if (!device_can_wakeup(kdev))
+ return -EOPNOTSUPP;
+
+ /* Interface Specific */
+ intf->wolopts = wol->wolopts;
+ if (intf->wolopts & WAKE_MAGICSECURE)
+ memcpy(intf->sopass, wol->sopass, sizeof(wol->sopass));
+
+ mutex_lock(&priv->wol_lock);
+ priv->enable_wol(intf, !!intf->wolopts);
+ mutex_unlock(&priv->wol_lock);
+
+ return 0;
+}
+
+static int bcmasp_flow_insert(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct bcmasp_net_filter *nfilter;
+ u32 loc = cmd->fs.location;
+ bool wake = false;
+
+ if (cmd->fs.ring_cookie == RX_CLS_FLOW_WAKE)
+ wake = true;
+
+ /* Currently only supports WAKE filters */
+ if (!wake)
+ return -EOPNOTSUPP;
+
+ switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ case IP_USER_FLOW:
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Check if filter already exists */
+ if (bcmasp_netfilt_check_dup(intf, &cmd->fs))
+ return -EINVAL;
+
+ nfilter = bcmasp_netfilt_get_init(intf, loc, wake, true);
+ if (IS_ERR(nfilter))
+ return PTR_ERR(nfilter);
+
+ /* Return the location where we did insert the filter */
+ cmd->fs.location = nfilter->hw_index;
+ memcpy(&nfilter->fs, &cmd->fs, sizeof(struct ethtool_rx_flow_spec));
+
+ /* Since we only support wake filters, defer register programming till
+ * suspend time.
+ */
+ return 0;
+}
+
+static int bcmasp_flow_delete(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct bcmasp_net_filter *nfilter;
+
+ nfilter = bcmasp_netfilt_get_init(intf, cmd->fs.location, false, false);
+ if (IS_ERR(nfilter))
+ return PTR_ERR(nfilter);
+
+ bcmasp_netfilt_release(intf, nfilter);
+
+ return 0;
+}
+
+static int bcmasp_flow_get(struct bcmasp_intf *intf, struct ethtool_rxnfc *cmd)
+{
+ struct bcmasp_net_filter *nfilter;
+
+ nfilter = bcmasp_netfilt_get_init(intf, cmd->fs.location, false, false);
+ if (IS_ERR(nfilter))
+ return PTR_ERR(nfilter);
+
+ memcpy(&cmd->fs, &nfilter->fs, sizeof(nfilter->fs));
+
+ cmd->data = NUM_NET_FILTERS;
+
+ return 0;
+}
+
+static int bcmasp_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ mutex_lock(&intf->parent->net_lock);
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ ret = bcmasp_flow_insert(dev, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ ret = bcmasp_flow_delete(dev, cmd);
+ break;
+ default:
+ break;
+ }
+
+ mutex_unlock(&intf->parent->net_lock);
+
+ return ret;
+}
+
+static int bcmasp_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ int err = 0;
+
+ mutex_lock(&intf->parent->net_lock);
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = bcmasp_netfilt_get_active(intf);
+ /* We support specifying rule locations */
+ cmd->data |= RX_CLS_LOC_SPECIAL;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = bcmasp_flow_get(intf, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ bcmasp_netfilt_get_all_active(intf, rule_locs, &cmd->rule_cnt);
+ cmd->data = NUM_NET_FILTERS;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ mutex_unlock(&intf->parent->net_lock);
+
+ return err;
+}
+
+void bcmasp_eee_enable_set(struct bcmasp_intf *intf, bool enable)
+{
+ u32 reg;
+
+ reg = umac_rl(intf, UMC_EEE_CTRL);
+ if (enable)
+ reg |= EEE_EN;
+ else
+ reg &= ~EEE_EN;
+ umac_wl(intf, reg, UMC_EEE_CTRL);
+
+ intf->eee.eee_enabled = enable;
+ intf->eee.eee_active = enable;
+}
+
+static int bcmasp_get_eee(struct net_device *dev, struct ethtool_eee *e)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct ethtool_eee *p = &intf->eee;
+
+ if (!dev->phydev)
+ return -ENODEV;
+
+ e->eee_enabled = p->eee_enabled;
+ e->eee_active = p->eee_active;
+ e->tx_lpi_enabled = p->tx_lpi_enabled;
+ e->tx_lpi_timer = umac_rl(intf, UMC_EEE_LPI_TIMER);
+
+ return phy_ethtool_get_eee(dev->phydev, e);
+}
+
+static int bcmasp_set_eee(struct net_device *dev, struct ethtool_eee *e)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct ethtool_eee *p = &intf->eee;
+ int ret;
+
+ if (!dev->phydev)
+ return -ENODEV;
+
+ if (!p->eee_enabled) {
+ bcmasp_eee_enable_set(intf, false);
+ } else {
+ ret = phy_init_eee(dev->phydev, 0);
+ if (ret) {
+ netif_err(intf, hw, dev,
+ "EEE initialization failed: %d\n", ret);
+ return ret;
+ }
+
+ umac_wl(intf, e->tx_lpi_timer, UMC_EEE_LPI_TIMER);
+ intf->eee.eee_active = ret >= 0;
+ intf->eee.tx_lpi_enabled = e->tx_lpi_enabled;
+ bcmasp_eee_enable_set(intf, true);
+ }
+
+ return phy_ethtool_set_eee(dev->phydev, e);
+}
+
+static void bcmasp_get_eth_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ mac_stats->FramesTransmittedOK = umac_rl(intf, UMC_GTPOK);
+ mac_stats->SingleCollisionFrames = umac_rl(intf, UMC_GTSCL);
+ mac_stats->MultipleCollisionFrames = umac_rl(intf, UMC_GTMCL);
+ mac_stats->FramesReceivedOK = umac_rl(intf, UMC_GRPOK);
+ mac_stats->FrameCheckSequenceErrors = umac_rl(intf, UMC_GRFCS);
+ mac_stats->AlignmentErrors = umac_rl(intf, UMC_GRALN);
+ mac_stats->OctetsTransmittedOK = umac_rl(intf, UMC_GTBYT);
+ mac_stats->FramesWithDeferredXmissions = umac_rl(intf, UMC_GTDRF);
+ mac_stats->LateCollisions = umac_rl(intf, UMC_GTLCL);
+ mac_stats->FramesAbortedDueToXSColls = umac_rl(intf, UMC_GTXCL);
+ mac_stats->OctetsReceivedOK = umac_rl(intf, UMC_GRBYT);
+ mac_stats->MulticastFramesXmittedOK = umac_rl(intf, UMC_GTMCA);
+ mac_stats->BroadcastFramesXmittedOK = umac_rl(intf, UMC_GTBCA);
+ mac_stats->FramesWithExcessiveDeferral = umac_rl(intf, UMC_GTEDF);
+ mac_stats->MulticastFramesReceivedOK = umac_rl(intf, UMC_GRMCA);
+ mac_stats->BroadcastFramesReceivedOK = umac_rl(intf, UMC_GRBCA);
+}
+
+static const struct ethtool_rmon_hist_range bcmasp_rmon_ranges[] = {
+ { 0, 64},
+ { 65, 127},
+ { 128, 255},
+ { 256, 511},
+ { 512, 1023},
+ { 1024, 1518},
+ { 1519, 1522},
+ {}
+};
+
+static void bcmasp_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ *ranges = bcmasp_rmon_ranges;
+
+ rmon_stats->undersize_pkts = umac_rl(intf, UMC_RRUND);
+ rmon_stats->oversize_pkts = umac_rl(intf, UMC_GROVR);
+ rmon_stats->fragments = umac_rl(intf, UMC_RRFRG);
+ rmon_stats->jabbers = umac_rl(intf, UMC_GRJBR);
+
+ rmon_stats->hist[0] = umac_rl(intf, UMC_GR64);
+ rmon_stats->hist[1] = umac_rl(intf, UMC_GR127);
+ rmon_stats->hist[2] = umac_rl(intf, UMC_GR255);
+ rmon_stats->hist[3] = umac_rl(intf, UMC_GR511);
+ rmon_stats->hist[4] = umac_rl(intf, UMC_GR1023);
+ rmon_stats->hist[5] = umac_rl(intf, UMC_GR1518);
+ rmon_stats->hist[6] = umac_rl(intf, UMC_GRMGV);
+
+ rmon_stats->hist_tx[0] = umac_rl(intf, UMC_TR64);
+ rmon_stats->hist_tx[1] = umac_rl(intf, UMC_TR127);
+ rmon_stats->hist_tx[2] = umac_rl(intf, UMC_TR255);
+ rmon_stats->hist_tx[3] = umac_rl(intf, UMC_TR511);
+ rmon_stats->hist_tx[4] = umac_rl(intf, UMC_TR1023);
+ rmon_stats->hist_tx[5] = umac_rl(intf, UMC_TR1518);
+ rmon_stats->hist_tx[6] = umac_rl(intf, UMC_TRMGV);
+}
+
+static void bcmasp_get_eth_ctrl_stats(struct net_device *dev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ ctrl_stats->MACControlFramesTransmitted = umac_rl(intf, UMC_GTXCF);
+ ctrl_stats->MACControlFramesReceived = umac_rl(intf, UMC_GRXCF);
+ ctrl_stats->UnsupportedOpcodesReceived = umac_rl(intf, UMC_GRXUO);
+}
+
+const struct ethtool_ops bcmasp_ethtool_ops = {
+ .get_drvinfo = bcmasp_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
+ .get_msglevel = bcmasp_get_msglevel,
+ .set_msglevel = bcmasp_set_msglevel,
+ .get_wol = bcmasp_get_wol,
+ .set_wol = bcmasp_set_wol,
+ .get_rxnfc = bcmasp_get_rxnfc,
+ .set_rxnfc = bcmasp_set_rxnfc,
+ .set_eee = bcmasp_set_eee,
+ .get_eee = bcmasp_get_eee,
+ .get_eth_mac_stats = bcmasp_get_eth_mac_stats,
+ .get_rmon_stats = bcmasp_get_rmon_stats,
+ .get_eth_ctrl_stats = bcmasp_get_eth_ctrl_stats,
+ .get_strings = bcmasp_get_strings,
+ .get_ethtool_stats = bcmasp_get_ethtool_stats,
+ .get_sset_count = bcmasp_get_sset_count,
+};
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
new file mode 100644
index 000000000000..53e542881255
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -0,0 +1,1415 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "bcmasp_intf: " fmt
+
+#include <asm/byteorder.h>
+#include <linux/brcmphy.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
+#include <linux/platform_device.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+#include "bcmasp.h"
+#include "bcmasp_intf_defs.h"
+
+static int incr_ring(int index, int ring_count)
+{
+ index++;
+ if (index == ring_count)
+ return 0;
+
+ return index;
+}
+
+/* Points to last byte of descriptor */
+static dma_addr_t incr_last_byte(dma_addr_t addr, dma_addr_t beg,
+ int ring_count)
+{
+ dma_addr_t end = beg + (ring_count * DESC_SIZE);
+
+ addr += DESC_SIZE;
+ if (addr > end)
+ return beg + DESC_SIZE - 1;
+
+ return addr;
+}
+
+/* Points to first byte of descriptor */
+static dma_addr_t incr_first_byte(dma_addr_t addr, dma_addr_t beg,
+ int ring_count)
+{
+ dma_addr_t end = beg + (ring_count * DESC_SIZE);
+
+ addr += DESC_SIZE;
+ if (addr >= end)
+ return beg;
+
+ return addr;
+}
+
+static void bcmasp_enable_tx(struct bcmasp_intf *intf, int en)
+{
+ if (en) {
+ tx_spb_ctrl_wl(intf, TX_SPB_CTRL_ENABLE_EN, TX_SPB_CTRL_ENABLE);
+ tx_epkt_core_wl(intf, (TX_EPKT_C_CFG_MISC_EN |
+ TX_EPKT_C_CFG_MISC_PT |
+ (intf->port << TX_EPKT_C_CFG_MISC_PS_SHIFT)),
+ TX_EPKT_C_CFG_MISC);
+ } else {
+ tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE);
+ tx_epkt_core_wl(intf, 0x0, TX_EPKT_C_CFG_MISC);
+ }
+}
+
+static void bcmasp_enable_rx(struct bcmasp_intf *intf, int en)
+{
+ if (en)
+ rx_edpkt_cfg_wl(intf, RX_EDPKT_CFG_ENABLE_EN,
+ RX_EDPKT_CFG_ENABLE);
+ else
+ rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
+}
+
+static void bcmasp_set_rx_mode(struct net_device *dev)
+{
+ unsigned char mask[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct netdev_hw_addr *ha;
+ int ret;
+
+ spin_lock_bh(&intf->parent->mda_lock);
+
+ bcmasp_disable_all_filters(intf);
+
+ if (dev->flags & IFF_PROMISC)
+ goto set_promisc;
+
+ bcmasp_set_promisc(intf, 0);
+
+ bcmasp_set_broad(intf, 1);
+
+ bcmasp_set_oaddr(intf, dev->dev_addr, 1);
+
+ if (dev->flags & IFF_ALLMULTI) {
+ bcmasp_set_allmulti(intf, 1);
+ } else {
+ bcmasp_set_allmulti(intf, 0);
+
+ netdev_for_each_mc_addr(ha, dev) {
+ ret = bcmasp_set_en_mda_filter(intf, ha->addr, mask);
+ if (ret) {
+ intf->mib.mc_filters_full_cnt++;
+ goto set_promisc;
+ }
+ }
+ }
+
+ netdev_for_each_uc_addr(ha, dev) {
+ ret = bcmasp_set_en_mda_filter(intf, ha->addr, mask);
+ if (ret) {
+ intf->mib.uc_filters_full_cnt++;
+ goto set_promisc;
+ }
+ }
+
+ spin_unlock_bh(&intf->parent->mda_lock);
+ return;
+
+set_promisc:
+ bcmasp_set_promisc(intf, 1);
+ intf->mib.promisc_filters_cnt++;
+
+ /* disable all filters used by this port */
+ bcmasp_disable_all_filters(intf);
+
+ spin_unlock_bh(&intf->parent->mda_lock);
+}
+
+static void bcmasp_clean_txcb(struct bcmasp_intf *intf, int index)
+{
+ struct bcmasp_tx_cb *txcb = &intf->tx_cbs[index];
+
+ txcb->skb = NULL;
+ dma_unmap_addr_set(txcb, dma_addr, 0);
+ dma_unmap_len_set(txcb, dma_len, 0);
+ txcb->last = false;
+}
+
+static int tx_spb_ring_full(struct bcmasp_intf *intf, int cnt)
+{
+ int next_index, i;
+
+ /* Check if we have enough room for cnt descriptors */
+ for (i = 0; i < cnt; i++) {
+ next_index = incr_ring(intf->tx_spb_index, DESC_RING_COUNT);
+ if (next_index == intf->tx_spb_clean_index)
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct sk_buff *bcmasp_csum_offload(struct net_device *dev,
+ struct sk_buff *skb,
+ bool *csum_hw)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ u32 header = 0, header2 = 0, epkt = 0;
+ struct bcmasp_pkt_offload *offload;
+ unsigned int header_cnt = 0;
+ u8 ip_proto;
+ int ret;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return skb;
+
+ ret = skb_cow_head(skb, sizeof(*offload));
+ if (ret < 0) {
+ intf->mib.tx_realloc_offload_failed++;
+ goto help;
+ }
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ header |= PKT_OFFLOAD_HDR_SIZE_2((ip_hdrlen(skb) >> 8) & 0xf);
+ header2 |= PKT_OFFLOAD_HDR2_SIZE_2(ip_hdrlen(skb) & 0xff);
+ epkt |= PKT_OFFLOAD_EPKT_IP(0) | PKT_OFFLOAD_EPKT_CSUM_L2;
+ ip_proto = ip_hdr(skb)->protocol;
+ header_cnt += 2;
+ break;
+ case htons(ETH_P_IPV6):
+ header |= PKT_OFFLOAD_HDR_SIZE_2((IP6_HLEN >> 8) & 0xf);
+ header2 |= PKT_OFFLOAD_HDR2_SIZE_2(IP6_HLEN & 0xff);
+ epkt |= PKT_OFFLOAD_EPKT_IP(1) | PKT_OFFLOAD_EPKT_CSUM_L2;
+ ip_proto = ipv6_hdr(skb)->nexthdr;
+ header_cnt += 2;
+ break;
+ default:
+ goto help;
+ }
+
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ header2 |= PKT_OFFLOAD_HDR2_SIZE_3(tcp_hdrlen(skb));
+ epkt |= PKT_OFFLOAD_EPKT_TP(0) | PKT_OFFLOAD_EPKT_CSUM_L3;
+ header_cnt++;
+ break;
+ case IPPROTO_UDP:
+ header2 |= PKT_OFFLOAD_HDR2_SIZE_3(UDP_HLEN);
+ epkt |= PKT_OFFLOAD_EPKT_TP(1) | PKT_OFFLOAD_EPKT_CSUM_L3;
+ header_cnt++;
+ break;
+ default:
+ goto help;
+ }
+
+ offload = (struct bcmasp_pkt_offload *)skb_push(skb, sizeof(*offload));
+
+ header |= PKT_OFFLOAD_HDR_OP | PKT_OFFLOAD_HDR_COUNT(header_cnt) |
+ PKT_OFFLOAD_HDR_SIZE_1(ETH_HLEN);
+ epkt |= PKT_OFFLOAD_EPKT_OP;
+
+ offload->nop = htonl(PKT_OFFLOAD_NOP);
+ offload->header = htonl(header);
+ offload->header2 = htonl(header2);
+ offload->epkt = htonl(epkt);
+ offload->end = htonl(PKT_OFFLOAD_END_OP);
+ *csum_hw = true;
+
+ return skb;
+
+help:
+ skb_checksum_help(skb);
+
+ return skb;
+}
+
+static unsigned long bcmasp_rx_edpkt_dma_rq(struct bcmasp_intf *intf)
+{
+ return rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID);
+}
+
+static void bcmasp_rx_edpkt_cfg_wq(struct bcmasp_intf *intf, dma_addr_t addr)
+{
+ rx_edpkt_cfg_wq(intf, addr, RX_EDPKT_RING_BUFFER_READ);
+}
+
+static void bcmasp_rx_edpkt_dma_wq(struct bcmasp_intf *intf, dma_addr_t addr)
+{
+ rx_edpkt_dma_wq(intf, addr, RX_EDPKT_DMA_READ);
+}
+
+static unsigned long bcmasp_tx_spb_dma_rq(struct bcmasp_intf *intf)
+{
+ return tx_spb_dma_rq(intf, TX_SPB_DMA_READ);
+}
+
+static void bcmasp_tx_spb_dma_wq(struct bcmasp_intf *intf, dma_addr_t addr)
+{
+ tx_spb_dma_wq(intf, addr, TX_SPB_DMA_VALID);
+}
+
+static const struct bcmasp_intf_ops bcmasp_intf_ops = {
+ .rx_desc_read = bcmasp_rx_edpkt_dma_rq,
+ .rx_buffer_write = bcmasp_rx_edpkt_cfg_wq,
+ .rx_desc_write = bcmasp_rx_edpkt_dma_wq,
+ .tx_read = bcmasp_tx_spb_dma_rq,
+ .tx_write = bcmasp_tx_spb_dma_wq,
+};
+
+static netdev_tx_t bcmasp_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ unsigned int total_bytes, size;
+ int spb_index, nr_frags, i, j;
+ struct bcmasp_tx_cb *txcb;
+ dma_addr_t mapping, valid;
+ struct bcmasp_desc *desc;
+ bool csum_hw = false;
+ struct device *kdev;
+ skb_frag_t *frag;
+
+ kdev = &intf->parent->pdev->dev;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+
+ if (tx_spb_ring_full(intf, nr_frags + 1)) {
+ netif_stop_queue(dev);
+ if (net_ratelimit())
+ netdev_err(dev, "Tx Ring Full!\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ /* Save skb len before adding csum offload header */
+ total_bytes = skb->len;
+ skb = bcmasp_csum_offload(dev, skb, &csum_hw);
+ if (!skb)
+ return NETDEV_TX_OK;
+
+ spb_index = intf->tx_spb_index;
+ valid = intf->tx_spb_dma_valid;
+ for (i = 0; i <= nr_frags; i++) {
+ if (!i) {
+ size = skb_headlen(skb);
+ if (!nr_frags && size < (ETH_ZLEN + ETH_FCS_LEN)) {
+ if (skb_put_padto(skb, ETH_ZLEN + ETH_FCS_LEN))
+ return NETDEV_TX_OK;
+ size = skb->len;
+ }
+ mapping = dma_map_single(kdev, skb->data, size,
+ DMA_TO_DEVICE);
+ } else {
+ frag = &skb_shinfo(skb)->frags[i - 1];
+ size = skb_frag_size(frag);
+ mapping = skb_frag_dma_map(kdev, frag, 0, size,
+ DMA_TO_DEVICE);
+ }
+
+ if (dma_mapping_error(kdev, mapping)) {
+ intf->mib.tx_dma_failed++;
+ spb_index = intf->tx_spb_index;
+ for (j = 0; j < i; j++) {
+ bcmasp_clean_txcb(intf, spb_index);
+ spb_index = incr_ring(spb_index,
+ DESC_RING_COUNT);
+ }
+ /* Rewind so we do not have a hole */
+ spb_index = intf->tx_spb_index;
+ return NETDEV_TX_OK;
+ }
+
+ txcb = &intf->tx_cbs[spb_index];
+ desc = &intf->tx_spb_cpu[spb_index];
+ memset(desc, 0, sizeof(*desc));
+ txcb->skb = skb;
+ txcb->bytes_sent = total_bytes;
+ dma_unmap_addr_set(txcb, dma_addr, mapping);
+ dma_unmap_len_set(txcb, dma_len, size);
+ if (!i) {
+ desc->flags |= DESC_SOF;
+ if (csum_hw)
+ desc->flags |= DESC_EPKT_CMD;
+ }
+
+ if (i == nr_frags) {
+ desc->flags |= DESC_EOF;
+ txcb->last = true;
+ }
+
+ desc->buf = mapping;
+ desc->size = size;
+ desc->flags |= DESC_INT_EN;
+
+ netif_dbg(intf, tx_queued, dev,
+ "%s dma_buf=%pad dma_len=0x%x flags=0x%x index=0x%x\n",
+ __func__, &mapping, desc->size, desc->flags,
+ spb_index);
+
+ spb_index = incr_ring(spb_index, DESC_RING_COUNT);
+ valid = incr_last_byte(valid, intf->tx_spb_dma_addr,
+ DESC_RING_COUNT);
+ }
+
+ /* Ensure all descriptors have been written to DRAM for the
+ * hardware to see up-to-date contents.
+ */
+ wmb();
+
+ intf->tx_spb_index = spb_index;
+ intf->tx_spb_dma_valid = valid;
+ bcmasp_intf_tx_write(intf, intf->tx_spb_dma_valid);
+
+ if (tx_spb_ring_full(intf, MAX_SKB_FRAGS + 1))
+ netif_stop_queue(dev);
+
+ return NETDEV_TX_OK;
+}
+
+static void bcmasp_netif_start(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ bcmasp_set_rx_mode(dev);
+ napi_enable(&intf->tx_napi);
+ napi_enable(&intf->rx_napi);
+
+ bcmasp_enable_rx_irq(intf, 1);
+ bcmasp_enable_tx_irq(intf, 1);
+
+ phy_start(dev->phydev);
+}
+
+static void umac_reset(struct bcmasp_intf *intf)
+{
+ umac_wl(intf, 0x0, UMC_CMD);
+ umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
+ usleep_range(10, 100);
+ umac_wl(intf, 0x0, UMC_CMD);
+}
+
+static void umac_set_hw_addr(struct bcmasp_intf *intf,
+ const unsigned char *addr)
+{
+ u32 mac0 = (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) |
+ addr[3];
+ u32 mac1 = (addr[4] << 8) | addr[5];
+
+ umac_wl(intf, mac0, UMC_MAC0);
+ umac_wl(intf, mac1, UMC_MAC1);
+}
+
+static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
+ unsigned int enable)
+{
+ u32 reg;
+
+ reg = umac_rl(intf, UMC_CMD);
+ if (enable)
+ reg |= mask;
+ else
+ reg &= ~mask;
+ umac_wl(intf, reg, UMC_CMD);
+
+ /* UniMAC stops on a packet boundary, wait for a full-sized packet
+ * to be processed (1 msec).
+ */
+ if (enable == 0)
+ usleep_range(1000, 2000);
+}
+
+static void umac_init(struct bcmasp_intf *intf)
+{
+ umac_wl(intf, 0x800, UMC_FRM_LEN);
+ umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
+ umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
+ umac_enable_set(intf, UMC_CMD_PROMISC, 1);
+}
+
+static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmasp_intf *intf =
+ container_of(napi, struct bcmasp_intf, tx_napi);
+ struct bcmasp_intf_stats64 *stats = &intf->stats64;
+ struct device *kdev = &intf->parent->pdev->dev;
+ unsigned long read, released = 0;
+ struct bcmasp_tx_cb *txcb;
+ struct bcmasp_desc *desc;
+ dma_addr_t mapping;
+
+ read = bcmasp_intf_tx_read(intf);
+ while (intf->tx_spb_dma_read != read) {
+ txcb = &intf->tx_cbs[intf->tx_spb_clean_index];
+ mapping = dma_unmap_addr(txcb, dma_addr);
+
+ dma_unmap_single(kdev, mapping,
+ dma_unmap_len(txcb, dma_len),
+ DMA_TO_DEVICE);
+
+ if (txcb->last) {
+ dev_consume_skb_any(txcb->skb);
+
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_packets);
+ u64_stats_add(&stats->tx_bytes, txcb->bytes_sent);
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ desc = &intf->tx_spb_cpu[intf->tx_spb_clean_index];
+
+ netif_dbg(intf, tx_done, intf->ndev,
+ "%s dma_buf=%pad dma_len=0x%x flags=0x%x c_index=0x%x\n",
+ __func__, &mapping, desc->size, desc->flags,
+ intf->tx_spb_clean_index);
+
+ bcmasp_clean_txcb(intf, intf->tx_spb_clean_index);
+ released++;
+
+ intf->tx_spb_clean_index = incr_ring(intf->tx_spb_clean_index,
+ DESC_RING_COUNT);
+ intf->tx_spb_dma_read = incr_first_byte(intf->tx_spb_dma_read,
+ intf->tx_spb_dma_addr,
+ DESC_RING_COUNT);
+ }
+
+ /* Ensure all descriptors have been written to DRAM for the hardware
+ * to see updated contents.
+ */
+ wmb();
+
+ napi_complete(&intf->tx_napi);
+
+ bcmasp_enable_tx_irq(intf, 1);
+
+ if (released)
+ netif_wake_queue(intf->ndev);
+
+ return 0;
+}
+
+static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmasp_intf *intf =
+ container_of(napi, struct bcmasp_intf, rx_napi);
+ struct bcmasp_intf_stats64 *stats = &intf->stats64;
+ struct device *kdev = &intf->parent->pdev->dev;
+ unsigned long processed = 0;
+ struct bcmasp_desc *desc;
+ struct sk_buff *skb;
+ dma_addr_t valid;
+ void *data;
+ u64 flags;
+ u32 len;
+
+ valid = bcmasp_intf_rx_desc_read(intf) + 1;
+ if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
+ valid = intf->rx_edpkt_dma_addr;
+
+ while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
+ desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
+
+ /* Ensure that descriptor has been fully written to DRAM by
+ * hardware before reading by the CPU
+ */
+ rmb();
+
+ /* Calculate virt addr by offsetting from physical addr */
+ data = intf->rx_ring_cpu +
+ (DESC_ADDR(desc->buf) - intf->rx_ring_dma);
+
+ flags = DESC_FLAGS(desc->buf);
+ if (unlikely(flags & (DESC_CRC_ERR | DESC_RX_SYM_ERR))) {
+ if (net_ratelimit()) {
+ netif_err(intf, rx_status, intf->ndev,
+ "flags=0x%llx\n", flags);
+ }
+
+ u64_stats_update_begin(&stats->syncp);
+ if (flags & DESC_CRC_ERR)
+ u64_stats_inc(&stats->rx_crc_errs);
+ if (flags & DESC_RX_SYM_ERR)
+ u64_stats_inc(&stats->rx_sym_errs);
+ u64_stats_update_end(&stats->syncp);
+
+ goto next;
+ }
+
+ dma_sync_single_for_cpu(kdev, DESC_ADDR(desc->buf), desc->size,
+ DMA_FROM_DEVICE);
+
+ len = desc->size;
+
+ skb = napi_alloc_skb(napi, len);
+ if (!skb) {
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_dropped);
+ u64_stats_update_end(&stats->syncp);
+ intf->mib.alloc_rx_skb_failed++;
+
+ goto next;
+ }
+
+ skb_put(skb, len);
+ memcpy(skb->data, data, len);
+
+ skb_pull(skb, 2);
+ len -= 2;
+ if (likely(intf->crc_fwd)) {
+ skb_trim(skb, len - ETH_FCS_LEN);
+ len -= ETH_FCS_LEN;
+ }
+
+ if ((intf->ndev->features & NETIF_F_RXCSUM) &&
+ (desc->buf & DESC_CHKSUM))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ skb->protocol = eth_type_trans(skb, intf->ndev);
+
+ napi_gro_receive(napi, skb);
+
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_packets);
+ u64_stats_add(&stats->rx_bytes, len);
+ u64_stats_update_end(&stats->syncp);
+
+next:
+ bcmasp_intf_rx_buffer_write(intf, (DESC_ADDR(desc->buf) +
+ desc->size));
+
+ processed++;
+ intf->rx_edpkt_dma_read =
+ incr_first_byte(intf->rx_edpkt_dma_read,
+ intf->rx_edpkt_dma_addr,
+ DESC_RING_COUNT);
+ intf->rx_edpkt_index = incr_ring(intf->rx_edpkt_index,
+ DESC_RING_COUNT);
+ }
+
+ bcmasp_intf_rx_desc_write(intf, intf->rx_edpkt_dma_read);
+
+ if (processed < budget) {
+ napi_complete_done(&intf->rx_napi, processed);
+ bcmasp_enable_rx_irq(intf, 1);
+ }
+
+ return processed;
+}
+
+static void bcmasp_adj_link(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct phy_device *phydev = dev->phydev;
+ u32 cmd_bits = 0, reg;
+ int changed = 0;
+
+ if (intf->old_link != phydev->link) {
+ changed = 1;
+ intf->old_link = phydev->link;
+ }
+
+ if (intf->old_duplex != phydev->duplex) {
+ changed = 1;
+ intf->old_duplex = phydev->duplex;
+ }
+
+ switch (phydev->speed) {
+ case SPEED_2500:
+ cmd_bits = UMC_CMD_SPEED_2500;
+ break;
+ case SPEED_1000:
+ cmd_bits = UMC_CMD_SPEED_1000;
+ break;
+ case SPEED_100:
+ cmd_bits = UMC_CMD_SPEED_100;
+ break;
+ case SPEED_10:
+ cmd_bits = UMC_CMD_SPEED_10;
+ break;
+ default:
+ break;
+ }
+ cmd_bits <<= UMC_CMD_SPEED_SHIFT;
+
+ if (phydev->duplex == DUPLEX_HALF)
+ cmd_bits |= UMC_CMD_HD_EN;
+
+ if (intf->old_pause != phydev->pause) {
+ changed = 1;
+ intf->old_pause = phydev->pause;
+ }
+
+ if (!phydev->pause)
+ cmd_bits |= UMC_CMD_RX_PAUSE_IGNORE | UMC_CMD_TX_PAUSE_IGNORE;
+
+ if (!changed)
+ return;
+
+ if (phydev->link) {
+ reg = umac_rl(intf, UMC_CMD);
+ reg &= ~((UMC_CMD_SPEED_MASK << UMC_CMD_SPEED_SHIFT) |
+ UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
+ UMC_CMD_TX_PAUSE_IGNORE);
+ reg |= cmd_bits;
+ umac_wl(intf, reg, UMC_CMD);
+
+ intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
+ bcmasp_eee_enable_set(intf, intf->eee.eee_active);
+ }
+
+ reg = rgmii_rl(intf, RGMII_OOB_CNTRL);
+ if (phydev->link)
+ reg |= RGMII_LINK;
+ else
+ reg &= ~RGMII_LINK;
+ rgmii_wl(intf, reg, RGMII_OOB_CNTRL);
+
+ if (changed)
+ phy_print_status(phydev);
+}
+
+static int bcmasp_init_rx(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+ struct page *buffer_pg;
+ dma_addr_t dma;
+ void *p;
+ u32 reg;
+ int ret;
+
+ intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
+ buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
+
+ dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(kdev, dma)) {
+ __free_pages(buffer_pg, intf->rx_buf_order);
+ return -ENOMEM;
+ }
+ intf->rx_ring_cpu = page_to_virt(buffer_pg);
+ intf->rx_ring_dma = dma;
+ intf->rx_ring_dma_valid = intf->rx_ring_dma + RING_BUFFER_SIZE - 1;
+
+ p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->rx_edpkt_dma_addr,
+ GFP_KERNEL);
+ if (!p) {
+ ret = -ENOMEM;
+ goto free_rx_ring;
+ }
+ intf->rx_edpkt_cpu = p;
+
+ netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
+
+ intf->rx_edpkt_dma_read = intf->rx_edpkt_dma_addr;
+ intf->rx_edpkt_index = 0;
+
+ /* Make sure channels are disabled */
+ rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
+
+ /* Rx SPB */
+ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
+ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
+ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
+ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
+ RX_EDPKT_RING_BUFFER_END);
+ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
+ RX_EDPKT_RING_BUFFER_VALID);
+
+ /* EDPKT */
+ rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
+ RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
+ (RX_EDPKT_CFG_CFG0_64_ALN <<
+ RX_EDPKT_CFG_CFG0_BALN_SHIFT) |
+ (RX_EDPKT_CFG_CFG0_EFRM_STUF),
+ RX_EDPKT_CFG_CFG0);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_WRITE);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_READ);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr, RX_EDPKT_DMA_BASE);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1),
+ RX_EDPKT_DMA_END);
+ rx_edpkt_dma_wq(intf, intf->rx_edpkt_dma_addr + (DESC_RING_SIZE - 1),
+ RX_EDPKT_DMA_VALID);
+
+ reg = UMAC2FB_CFG_DEFAULT_EN |
+ ((intf->channel + 11) << UMAC2FB_CFG_CHID_SHIFT);
+ reg |= (0xd << UMAC2FB_CFG_OK_SEND_SHIFT);
+ umac2fb_wl(intf, reg, UMAC2FB_CFG);
+
+ return 0;
+
+free_rx_ring:
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+
+ return ret;
+}
+
+static void bcmasp_reclaim_free_all_rx(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
+ intf->rx_edpkt_dma_addr);
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+}
+
+static int bcmasp_init_tx(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+ void *p;
+ int ret;
+
+ p = dma_alloc_coherent(kdev, DESC_RING_SIZE, &intf->tx_spb_dma_addr,
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ intf->tx_spb_cpu = p;
+ intf->tx_spb_dma_valid = intf->tx_spb_dma_addr + DESC_RING_SIZE - 1;
+ intf->tx_spb_dma_read = intf->tx_spb_dma_addr;
+
+ intf->tx_cbs = kcalloc(DESC_RING_COUNT, sizeof(struct bcmasp_tx_cb),
+ GFP_KERNEL);
+ if (!intf->tx_cbs) {
+ ret = -ENOMEM;
+ goto free_tx_spb;
+ }
+
+ intf->tx_spb_index = 0;
+ intf->tx_spb_clean_index = 0;
+
+ netif_napi_add_tx(intf->ndev, &intf->tx_napi, bcmasp_tx_poll);
+
+ /* Make sure channels are disabled */
+ tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE);
+ tx_epkt_core_wl(intf, 0x0, TX_EPKT_C_CFG_MISC);
+
+ /* Tx SPB */
+ tx_spb_ctrl_wl(intf, ((intf->channel + 8) << TX_SPB_CTRL_XF_BID_SHIFT),
+ TX_SPB_CTRL_XF_CTRL2);
+ tx_pause_ctrl_wl(intf, (1 << (intf->channel + 8)), TX_PAUSE_MAP_VECTOR);
+ tx_spb_top_wl(intf, 0x1e, TX_SPB_TOP_BLKOUT);
+ tx_spb_top_wl(intf, 0x0, TX_SPB_TOP_SPRE_BW_CTRL);
+
+ tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_READ);
+ tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_BASE);
+ tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_END);
+ tx_spb_dma_wq(intf, intf->tx_spb_dma_valid, TX_SPB_DMA_VALID);
+
+ return 0;
+
+free_tx_spb:
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
+ intf->tx_spb_dma_addr);
+
+ return ret;
+}
+
+static void bcmasp_reclaim_free_all_tx(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+
+ /* Free descriptors */
+ dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
+ intf->tx_spb_dma_addr);
+
+ /* Free cbs */
+ kfree(intf->tx_cbs);
+}
+
+static void bcmasp_ephy_enable_set(struct bcmasp_intf *intf, bool enable)
+{
+ u32 mask = RGMII_EPHY_CFG_IDDQ_BIAS | RGMII_EPHY_CFG_EXT_PWRDOWN |
+ RGMII_EPHY_CFG_IDDQ_GLOBAL;
+ u32 reg;
+
+ reg = rgmii_rl(intf, RGMII_EPHY_CNTRL);
+ if (enable) {
+ reg &= ~RGMII_EPHY_CK25_DIS;
+ rgmii_wl(intf, reg, RGMII_EPHY_CNTRL);
+ mdelay(1);
+
+ reg &= ~mask;
+ reg |= RGMII_EPHY_RESET;
+ rgmii_wl(intf, reg, RGMII_EPHY_CNTRL);
+ mdelay(1);
+
+ reg &= ~RGMII_EPHY_RESET;
+ } else {
+ reg |= mask | RGMII_EPHY_RESET;
+ rgmii_wl(intf, reg, RGMII_EPHY_CNTRL);
+ mdelay(1);
+ reg |= RGMII_EPHY_CK25_DIS;
+ }
+ rgmii_wl(intf, reg, RGMII_EPHY_CNTRL);
+ mdelay(1);
+
+ /* Set or clear the LED control override to avoid lighting up LEDs
+ * while the EPHY is powered off and drawing unnecessary current.
+ */
+ reg = rgmii_rl(intf, RGMII_SYS_LED_CNTRL);
+ if (enable)
+ reg &= ~RGMII_SYS_LED_CNTRL_LINK_OVRD;
+ else
+ reg |= RGMII_SYS_LED_CNTRL_LINK_OVRD;
+ rgmii_wl(intf, reg, RGMII_SYS_LED_CNTRL);
+}
+
+static void bcmasp_rgmii_mode_en_set(struct bcmasp_intf *intf, bool enable)
+{
+ u32 reg;
+
+ reg = rgmii_rl(intf, RGMII_OOB_CNTRL);
+ reg &= ~RGMII_OOB_DIS;
+ if (enable)
+ reg |= RGMII_MODE_EN;
+ else
+ reg &= ~RGMII_MODE_EN;
+ rgmii_wl(intf, reg, RGMII_OOB_CNTRL);
+}
+
+static void bcmasp_netif_deinit(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ u32 reg, timeout = 1000;
+
+ napi_disable(&intf->tx_napi);
+
+ bcmasp_enable_tx(intf, 0);
+
+ /* Flush any TX packets in the pipe */
+ tx_spb_dma_wl(intf, TX_SPB_DMA_FIFO_FLUSH, TX_SPB_DMA_FIFO_CTRL);
+ do {
+ reg = tx_spb_dma_rl(intf, TX_SPB_DMA_FIFO_STATUS);
+ if (!(reg & TX_SPB_DMA_FIFO_FLUSH))
+ break;
+ usleep_range(1000, 2000);
+ } while (timeout-- > 0);
+ tx_spb_dma_wl(intf, 0x0, TX_SPB_DMA_FIFO_CTRL);
+
+ umac_enable_set(intf, UMC_CMD_TX_EN, 0);
+
+ phy_stop(dev->phydev);
+
+ umac_enable_set(intf, UMC_CMD_RX_EN, 0);
+
+ bcmasp_flush_rx_port(intf);
+ usleep_range(1000, 2000);
+ bcmasp_enable_rx(intf, 0);
+
+ napi_disable(&intf->rx_napi);
+
+ /* Disable interrupts */
+ bcmasp_enable_tx_irq(intf, 0);
+ bcmasp_enable_rx_irq(intf, 0);
+
+ netif_napi_del(&intf->tx_napi);
+ bcmasp_reclaim_free_all_tx(intf);
+
+ netif_napi_del(&intf->rx_napi);
+ bcmasp_reclaim_free_all_rx(intf);
+}
+
+static int bcmasp_stop(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ netif_dbg(intf, ifdown, dev, "bcmasp stop\n");
+
+ /* Stop tx from updating HW */
+ netif_tx_disable(dev);
+
+ bcmasp_netif_deinit(dev);
+
+ phy_disconnect(dev->phydev);
+
+ /* Disable internal EPHY or external PHY */
+ if (intf->internal_phy)
+ bcmasp_ephy_enable_set(intf, false);
+ else
+ bcmasp_rgmii_mode_en_set(intf, false);
+
+ /* Disable the interface clocks */
+ bcmasp_core_clock_set_intf(intf, false);
+
+ clk_disable_unprepare(intf->parent->clk);
+
+ return 0;
+}
+
+static void bcmasp_configure_port(struct bcmasp_intf *intf)
+{
+ u32 reg, id_mode_dis = 0;
+
+ reg = rgmii_rl(intf, RGMII_PORT_CNTRL);
+ reg &= ~RGMII_PORT_MODE_MASK;
+
+ switch (intf->phy_interface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ /* RGMII_NO_ID: TXC transitions at the same time as TXD
+ * (requires PCB or receiver-side delay)
+ * RGMII: Add 2ns delay on TXC (90 degree shift)
+ *
+ * ID is implicitly disabled for 100Mbps (RG)MII operation.
+ */
+ id_mode_dis = RGMII_ID_MODE_DIS;
+ fallthrough;
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ reg |= RGMII_PORT_MODE_EXT_GPHY;
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ reg |= RGMII_PORT_MODE_EXT_EPHY;
+ break;
+ default:
+ break;
+ }
+
+ if (intf->internal_phy)
+ reg |= RGMII_PORT_MODE_EPHY;
+
+ rgmii_wl(intf, reg, RGMII_PORT_CNTRL);
+
+ reg = rgmii_rl(intf, RGMII_OOB_CNTRL);
+ reg &= ~RGMII_ID_MODE_DIS;
+ reg |= id_mode_dis;
+ rgmii_wl(intf, reg, RGMII_OOB_CNTRL);
+}
+
+static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ phy_interface_t phy_iface = intf->phy_interface;
+ u32 phy_flags = PHY_BRCM_AUTO_PWRDWN_ENABLE |
+ PHY_BRCM_DIS_TXCRXC_NOENRGY |
+ PHY_BRCM_IDDQ_SUSPEND;
+ struct phy_device *phydev = NULL;
+ int ret;
+
+ /* Always enable interface clocks */
+ bcmasp_core_clock_set_intf(intf, true);
+
+ /* Enable internal PHY or external PHY before any MAC activity */
+ if (intf->internal_phy)
+ bcmasp_ephy_enable_set(intf, true);
+ else
+ bcmasp_rgmii_mode_en_set(intf, true);
+ bcmasp_configure_port(intf);
+
+ /* This is an ugly quirk but we have not been correctly
+ * interpreting the phy_interface values and we have done that
+ * across different drivers, so at least we are consistent in
+ * our mistakes.
+ *
+ * When the Generic PHY driver is in use either the PHY has
+ * been strapped or programmed correctly by the boot loader so
+ * we should stick to our incorrect interpretation since we
+ * have validated it.
+ *
+ * Now when a dedicated PHY driver is in use, we need to
+ * reverse the meaning of the phy_interface_mode values to
+ * something that the PHY driver will interpret and act on such
+ * that we have two mistakes canceling themselves so to speak.
+ * We only do this for the two modes that GENET driver
+ * officially supports on Broadcom STB chips:
+ * PHY_INTERFACE_MODE_RGMII and PHY_INTERFACE_MODE_RGMII_TXID.
+ * Other modes are not *officially* supported with the boot
+ * loader and the scripted environment generating Device Tree
+ * blobs for those platforms.
+ *
+ * Note that internal PHY and fixed-link configurations are not
+ * affected because they use different phy_interface_t values
+ * or the Generic PHY driver.
+ */
+ switch (phy_iface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ phy_iface = PHY_INTERFACE_MODE_RGMII_ID;
+ break;
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ phy_iface = PHY_INTERFACE_MODE_RGMII_RXID;
+ break;
+ default:
+ break;
+ }
+
+ if (phy_connect) {
+ phydev = of_phy_connect(dev, intf->phy_dn,
+ bcmasp_adj_link, phy_flags,
+ phy_iface);
+ if (!phydev) {
+ ret = -ENODEV;
+ netdev_err(dev, "could not attach to PHY\n");
+ goto err_phy_disable;
+ }
+ } else if (!intf->wolopts) {
+ ret = phy_resume(dev->phydev);
+ if (ret)
+ goto err_phy_disable;
+ }
+
+ umac_reset(intf);
+
+ umac_init(intf);
+
+ /* Disable the UniMAC RX/TX */
+ umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
+
+ umac_set_hw_addr(intf, dev->dev_addr);
+
+ intf->old_duplex = -1;
+ intf->old_link = -1;
+ intf->old_pause = -1;
+
+ ret = bcmasp_init_tx(intf);
+ if (ret)
+ goto err_phy_disconnect;
+
+ /* Turn on asp */
+ bcmasp_enable_tx(intf, 1);
+
+ ret = bcmasp_init_rx(intf);
+ if (ret)
+ goto err_reclaim_tx;
+
+ bcmasp_enable_rx(intf, 1);
+
+ /* Turn on UniMAC TX/RX */
+ umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
+
+ intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
+
+ bcmasp_netif_start(dev);
+
+ netif_start_queue(dev);
+
+ return 0;
+
+err_reclaim_tx:
+ bcmasp_reclaim_free_all_tx(intf);
+err_phy_disconnect:
+ if (phydev)
+ phy_disconnect(phydev);
+err_phy_disable:
+ if (intf->internal_phy)
+ bcmasp_ephy_enable_set(intf, false);
+ else
+ bcmasp_rgmii_mode_en_set(intf, false);
+ return ret;
+}
+
+static int bcmasp_open(struct net_device *dev)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ int ret;
+
+ netif_dbg(intf, ifup, dev, "bcmasp open\n");
+
+ ret = clk_prepare_enable(intf->parent->clk);
+ if (ret)
+ return ret;
+
+ ret = bcmasp_netif_init(dev, true);
+ if (ret)
+ clk_disable_unprepare(intf->parent->clk);
+
+ return ret;
+}
+
+static void bcmasp_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ netif_dbg(intf, tx_err, dev, "transmit timeout!\n");
+ intf->mib.tx_timeout_cnt++;
+}
+
+static int bcmasp_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+
+ if (snprintf(name, len, "p%d", intf->port) >= len)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void bcmasp_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct bcmasp_intf *intf = netdev_priv(dev);
+ struct bcmasp_intf_stats64 *lstats;
+ unsigned int start;
+
+ lstats = &intf->stats64;
+
+ do {
+ start = u64_stats_fetch_begin(&lstats->syncp);
+ stats->rx_packets = u64_stats_read(&lstats->rx_packets);
+ stats->rx_bytes = u64_stats_read(&lstats->rx_bytes);
+ stats->rx_dropped = u64_stats_read(&lstats->rx_dropped);
+ stats->rx_crc_errors = u64_stats_read(&lstats->rx_crc_errs);
+ stats->rx_frame_errors = u64_stats_read(&lstats->rx_sym_errs);
+ stats->rx_errors = stats->rx_crc_errors + stats->rx_frame_errors;
+
+ stats->tx_packets = u64_stats_read(&lstats->tx_packets);
+ stats->tx_bytes = u64_stats_read(&lstats->tx_bytes);
+ } while (u64_stats_fetch_retry(&lstats->syncp, start));
+}
+
+static const struct net_device_ops bcmasp_netdev_ops = {
+ .ndo_open = bcmasp_open,
+ .ndo_stop = bcmasp_stop,
+ .ndo_start_xmit = bcmasp_xmit,
+ .ndo_tx_timeout = bcmasp_tx_timeout,
+ .ndo_set_rx_mode = bcmasp_set_rx_mode,
+ .ndo_get_phys_port_name = bcmasp_get_phys_port_name,
+ .ndo_eth_ioctl = phy_do_ioctl_running,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_get_stats64 = bcmasp_get_stats64,
+};
+
+static void bcmasp_map_res(struct bcmasp_priv *priv, struct bcmasp_intf *intf)
+{
+ /* Per port */
+ intf->res.umac = priv->base + UMC_OFFSET(intf);
+ intf->res.umac2fb = priv->base + (priv->hw_info->umac2fb +
+ (intf->port * 0x4));
+ intf->res.rgmii = priv->base + RGMII_OFFSET(intf);
+
+ /* Per ch */
+ intf->tx_spb_dma = priv->base + TX_SPB_DMA_OFFSET(intf);
+ intf->res.tx_spb_ctrl = priv->base + TX_SPB_CTRL_OFFSET(intf);
+ intf->res.tx_spb_top = priv->base + TX_SPB_TOP_OFFSET(intf);
+ intf->res.tx_epkt_core = priv->base + TX_EPKT_C_OFFSET(intf);
+ intf->res.tx_pause_ctrl = priv->base + TX_PAUSE_CTRL_OFFSET(intf);
+
+ intf->rx_edpkt_dma = priv->base + RX_EDPKT_DMA_OFFSET(intf);
+ intf->rx_edpkt_cfg = priv->base + RX_EDPKT_CFG_OFFSET(intf);
+}
+
+#define MAX_IRQ_STR_LEN 64
+struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv,
+ struct device_node *ndev_dn, int i)
+{
+ struct device *dev = &priv->pdev->dev;
+ struct bcmasp_intf *intf;
+ struct net_device *ndev;
+ int ch, port, ret;
+
+ if (of_property_read_u32(ndev_dn, "reg", &port)) {
+ dev_warn(dev, "%s: invalid port number\n", ndev_dn->name);
+ goto err;
+ }
+
+ if (of_property_read_u32(ndev_dn, "brcm,channel", &ch)) {
+ dev_warn(dev, "%s: invalid ch number\n", ndev_dn->name);
+ goto err;
+ }
+
+ ndev = alloc_etherdev(sizeof(struct bcmasp_intf));
+ if (!ndev) {
+ dev_warn(dev, "%s: unable to alloc ndev\n", ndev_dn->name);
+ goto err;
+ }
+ intf = netdev_priv(ndev);
+
+ intf->parent = priv;
+ intf->ndev = ndev;
+ intf->channel = ch;
+ intf->port = port;
+ intf->ndev_dn = ndev_dn;
+ intf->index = i;
+
+ ret = of_get_phy_mode(ndev_dn, &intf->phy_interface);
+ if (ret < 0) {
+ dev_err(dev, "invalid PHY mode property\n");
+ goto err_free_netdev;
+ }
+
+ if (intf->phy_interface == PHY_INTERFACE_MODE_INTERNAL)
+ intf->internal_phy = true;
+
+ intf->phy_dn = of_parse_phandle(ndev_dn, "phy-handle", 0);
+ if (!intf->phy_dn && of_phy_is_fixed_link(ndev_dn)) {
+ ret = of_phy_register_fixed_link(ndev_dn);
+ if (ret) {
+ dev_warn(dev, "%s: failed to register fixed PHY\n",
+ ndev_dn->name);
+ goto err_free_netdev;
+ }
+ intf->phy_dn = ndev_dn;
+ }
+
+ /* Map resource */
+ bcmasp_map_res(priv, intf);
+
+ if ((!phy_interface_mode_is_rgmii(intf->phy_interface) &&
+ intf->phy_interface != PHY_INTERFACE_MODE_MII &&
+ intf->phy_interface != PHY_INTERFACE_MODE_INTERNAL) ||
+ (intf->port != 1 && intf->internal_phy)) {
+ netdev_err(intf->ndev, "invalid PHY mode: %s for port %d\n",
+ phy_modes(intf->phy_interface), intf->port);
+ ret = -EINVAL;
+ goto err_free_netdev;
+ }
+
+ ret = of_get_ethdev_address(ndev_dn, ndev);
+ if (ret) {
+ netdev_warn(ndev, "using random Ethernet MAC\n");
+ eth_hw_addr_random(ndev);
+ }
+
+ SET_NETDEV_DEV(ndev, dev);
+ intf->ops = &bcmasp_intf_ops;
+ ndev->netdev_ops = &bcmasp_netdev_ops;
+ ndev->ethtool_ops = &bcmasp_ethtool_ops;
+ intf->msg_enable = netif_msg_init(-1, NETIF_MSG_DRV |
+ NETIF_MSG_PROBE |
+ NETIF_MSG_LINK);
+ ndev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
+ NETIF_F_RXCSUM;
+ ndev->hw_features |= ndev->features;
+ ndev->needed_headroom += sizeof(struct bcmasp_pkt_offload);
+
+ return intf;
+
+err_free_netdev:
+ free_netdev(ndev);
+err:
+ return NULL;
+}
+
+void bcmasp_interface_destroy(struct bcmasp_intf *intf)
+{
+ if (intf->ndev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(intf->ndev);
+ if (of_phy_is_fixed_link(intf->ndev_dn))
+ of_phy_deregister_fixed_link(intf->ndev_dn);
+ free_netdev(intf->ndev);
+}
+
+static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
+{
+ struct net_device *ndev = intf->ndev;
+ u32 reg;
+
+ reg = umac_rl(intf, UMC_MPD_CTRL);
+ if (intf->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE))
+ reg |= UMC_MPD_CTRL_MPD_EN;
+ reg &= ~UMC_MPD_CTRL_PSW_EN;
+ if (intf->wolopts & WAKE_MAGICSECURE) {
+ /* Program the SecureOn password */
+ umac_wl(intf, get_unaligned_be16(&intf->sopass[0]),
+ UMC_PSW_MS);
+ umac_wl(intf, get_unaligned_be32(&intf->sopass[2]),
+ UMC_PSW_LS);
+ reg |= UMC_MPD_CTRL_PSW_EN;
+ }
+ umac_wl(intf, reg, UMC_MPD_CTRL);
+
+ if (intf->wolopts & WAKE_FILTER)
+ bcmasp_netfilt_suspend(intf);
+
+ /* UniMAC receive needs to be turned on */
+ umac_enable_set(intf, UMC_CMD_RX_EN, 1);
+
+ if (intf->parent->wol_irq > 0) {
+ wakeup_intr2_core_wl(intf->parent, 0xffffffff,
+ ASP_WAKEUP_INTR2_MASK_CLEAR);
+ }
+
+ netif_dbg(intf, wol, ndev, "entered WOL mode\n");
+}
+
+int bcmasp_interface_suspend(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+ struct net_device *dev = intf->ndev;
+ int ret = 0;
+
+ if (!netif_running(dev))
+ return 0;
+
+ netif_device_detach(dev);
+
+ bcmasp_netif_deinit(dev);
+
+ if (!intf->wolopts) {
+ ret = phy_suspend(dev->phydev);
+ if (ret)
+ goto out;
+
+ if (intf->internal_phy)
+ bcmasp_ephy_enable_set(intf, false);
+ else
+ bcmasp_rgmii_mode_en_set(intf, false);
+
+ /* If Wake-on-LAN is disabled, we can safely
+ * disable the network interface clocks.
+ */
+ bcmasp_core_clock_set_intf(intf, false);
+ }
+
+ if (device_may_wakeup(kdev) && intf->wolopts)
+ bcmasp_suspend_to_wol(intf);
+
+ clk_disable_unprepare(intf->parent->clk);
+
+ return ret;
+
+out:
+ bcmasp_netif_init(dev, false);
+ return ret;
+}
+
+static void bcmasp_resume_from_wol(struct bcmasp_intf *intf)
+{
+ u32 reg;
+
+ reg = umac_rl(intf, UMC_MPD_CTRL);
+ reg &= ~UMC_MPD_CTRL_MPD_EN;
+ umac_wl(intf, reg, UMC_MPD_CTRL);
+
+ if (intf->parent->wol_irq > 0) {
+ wakeup_intr2_core_wl(intf->parent, 0xffffffff,
+ ASP_WAKEUP_INTR2_MASK_SET);
+ }
+}
+
+int bcmasp_interface_resume(struct bcmasp_intf *intf)
+{
+ struct net_device *dev = intf->ndev;
+ int ret;
+
+ if (!netif_running(dev))
+ return 0;
+
+ ret = clk_prepare_enable(intf->parent->clk);
+ if (ret)
+ return ret;
+
+ ret = bcmasp_netif_init(dev, false);
+ if (ret)
+ goto out;
+
+ bcmasp_resume_from_wol(intf);
+
+ if (intf->eee.eee_enabled)
+ bcmasp_eee_enable_set(intf, true);
+
+ netif_device_attach(dev);
+
+ return 0;
+
+out:
+ clk_disable_unprepare(intf->parent->clk);
+ return ret;
+}
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
new file mode 100644
index 000000000000..ad742612895f
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BCMASP_INTF_DEFS_H
+#define __BCMASP_INTF_DEFS_H
+
+#define UMC_OFFSET(intf) \
+ ((((intf)->port) * 0x800) + 0xc000)
+#define UMC_CMD 0x008
+#define UMC_CMD_TX_EN BIT(0)
+#define UMC_CMD_RX_EN BIT(1)
+#define UMC_CMD_SPEED_SHIFT 0x2
+#define UMC_CMD_SPEED_MASK 0x3
+#define UMC_CMD_SPEED_10 0x0
+#define UMC_CMD_SPEED_100 0x1
+#define UMC_CMD_SPEED_1000 0x2
+#define UMC_CMD_SPEED_2500 0x3
+#define UMC_CMD_PROMISC BIT(4)
+#define UMC_CMD_PAD_EN BIT(5)
+#define UMC_CMD_CRC_FWD BIT(6)
+#define UMC_CMD_PAUSE_FWD BIT(7)
+#define UMC_CMD_RX_PAUSE_IGNORE BIT(8)
+#define UMC_CMD_TX_ADDR_INS BIT(9)
+#define UMC_CMD_HD_EN BIT(10)
+#define UMC_CMD_SW_RESET BIT(13)
+#define UMC_CMD_LCL_LOOP_EN BIT(15)
+#define UMC_CMD_AUTO_CONFIG BIT(22)
+#define UMC_CMD_CNTL_FRM_EN BIT(23)
+#define UMC_CMD_NO_LEN_CHK BIT(24)
+#define UMC_CMD_RMT_LOOP_EN BIT(25)
+#define UMC_CMD_PRBL_EN BIT(27)
+#define UMC_CMD_TX_PAUSE_IGNORE BIT(28)
+#define UMC_CMD_TX_RX_EN BIT(29)
+#define UMC_CMD_RUNT_FILTER_DIS BIT(30)
+#define UMC_MAC0 0x0c
+#define UMC_MAC1 0x10
+#define UMC_FRM_LEN 0x14
+#define UMC_EEE_CTRL 0x64
+#define EN_LPI_RX_PAUSE BIT(0)
+#define EN_LPI_TX_PFC BIT(1)
+#define EN_LPI_TX_PAUSE BIT(2)
+#define EEE_EN BIT(3)
+#define RX_FIFO_CHECK BIT(4)
+#define EEE_TX_CLK_DIS BIT(5)
+#define DIS_EEE_10M BIT(6)
+#define LP_IDLE_PREDICTION_MODE BIT(7)
+#define UMC_EEE_LPI_TIMER 0x68
+#define UMC_PAUSE_CNTRL 0x330
+#define UMC_TX_FLUSH 0x334
+#define UMC_GR64 0x400
+#define UMC_GR127 0x404
+#define UMC_GR255 0x408
+#define UMC_GR511 0x40c
+#define UMC_GR1023 0x410
+#define UMC_GR1518 0x414
+#define UMC_GRMGV 0x418
+#define UMC_GR2047 0x41c
+#define UMC_GR4095 0x420
+#define UMC_GR9216 0x424
+#define UMC_GRPKT 0x428
+#define UMC_GRBYT 0x42c
+#define UMC_GRMCA 0x430
+#define UMC_GRBCA 0x434
+#define UMC_GRFCS 0x438
+#define UMC_GRXCF 0x43c
+#define UMC_GRXPF 0x440
+#define UMC_GRXUO 0x444
+#define UMC_GRALN 0x448
+#define UMC_GRFLR 0x44c
+#define UMC_GRCDE 0x450
+#define UMC_GRFCR 0x454
+#define UMC_GROVR 0x458
+#define UMC_GRJBR 0x45c
+#define UMC_GRMTUE 0x460
+#define UMC_GRPOK 0x464
+#define UMC_GRUC 0x468
+#define UMC_GRPPP 0x46c
+#define UMC_GRMCRC 0x470
+#define UMC_TR64 0x480
+#define UMC_TR127 0x484
+#define UMC_TR255 0x488
+#define UMC_TR511 0x48c
+#define UMC_TR1023 0x490
+#define UMC_TR1518 0x494
+#define UMC_TRMGV 0x498
+#define UMC_TR2047 0x49c
+#define UMC_TR4095 0x4a0
+#define UMC_TR9216 0x4a4
+#define UMC_GTPKT 0x4a8
+#define UMC_GTMCA 0x4ac
+#define UMC_GTBCA 0x4b0
+#define UMC_GTXPF 0x4b4
+#define UMC_GTXCF 0x4b8
+#define UMC_GTFCS 0x4bc
+#define UMC_GTOVR 0x4c0
+#define UMC_GTDRF 0x4c4
+#define UMC_GTEDF 0x4c8
+#define UMC_GTSCL 0x4cc
+#define UMC_GTMCL 0x4d0
+#define UMC_GTLCL 0x4d4
+#define UMC_GTXCL 0x4d8
+#define UMC_GTFRG 0x4dc
+#define UMC_GTNCL 0x4e0
+#define UMC_GTJBR 0x4e4
+#define UMC_GTBYT 0x4e8
+#define UMC_GTPOK 0x4ec
+#define UMC_GTUC 0x4f0
+#define UMC_RRPKT 0x500
+#define UMC_RRUND 0x504
+#define UMC_RRFRG 0x508
+#define UMC_RRBYT 0x50c
+#define UMC_MIB_CNTRL 0x580
+#define UMC_MIB_CNTRL_RX_CNT_RST BIT(0)
+#define UMC_MIB_CNTRL_RUNT_CNT_RST BIT(1)
+#define UMC_MIB_CNTRL_TX_CNT_RST BIT(2)
+#define UMC_RX_MAX_PKT_SZ 0x608
+#define UMC_MPD_CTRL 0x620
+#define UMC_MPD_CTRL_MPD_EN BIT(0)
+#define UMC_MPD_CTRL_PSW_EN BIT(27)
+#define UMC_PSW_MS 0x624
+#define UMC_PSW_LS 0x628
+
+#define UMAC2FB_OFFSET_2_1 0x9f044
+#define UMAC2FB_OFFSET 0x9f03c
+#define UMAC2FB_CFG 0x0
+#define UMAC2FB_CFG_OPUT_EN BIT(0)
+#define UMAC2FB_CFG_VLAN_EN BIT(1)
+#define UMAC2FB_CFG_SNAP_EN BIT(2)
+#define UMAC2FB_CFG_BCM_TG_EN BIT(3)
+#define UMAC2FB_CFG_IPUT_EN BIT(4)
+#define UMAC2FB_CFG_CHID_SHIFT 8
+#define UMAC2FB_CFG_OK_SEND_SHIFT 24
+#define UMAC2FB_CFG_DEFAULT_EN \
+ (UMAC2FB_CFG_OPUT_EN | UMAC2FB_CFG_VLAN_EN \
+ | UMAC2FB_CFG_SNAP_EN | UMAC2FB_CFG_IPUT_EN)
+
+#define RGMII_OFFSET(intf) \
+ ((((intf)->port) * 0x100) + 0xd000)
+#define RGMII_EPHY_CNTRL 0x00
+#define RGMII_EPHY_CFG_IDDQ_BIAS BIT(0)
+#define RGMII_EPHY_CFG_EXT_PWRDOWN BIT(1)
+#define RGMII_EPHY_CFG_FORCE_DLL_EN BIT(2)
+#define RGMII_EPHY_CFG_IDDQ_GLOBAL BIT(3)
+#define RGMII_EPHY_CK25_DIS BIT(4)
+#define RGMII_EPHY_RESET BIT(7)
+#define RGMII_OOB_CNTRL 0x0c
+#define RGMII_LINK BIT(4)
+#define RGMII_OOB_DIS BIT(5)
+#define RGMII_MODE_EN BIT(6)
+#define RGMII_ID_MODE_DIS BIT(16)
+
+#define RGMII_PORT_CNTRL 0x60
+#define RGMII_PORT_MODE_EPHY 0
+#define RGMII_PORT_MODE_GPHY 1
+#define RGMII_PORT_MODE_EXT_EPHY 2
+#define RGMII_PORT_MODE_EXT_GPHY 3
+#define RGMII_PORT_MODE_EXT_RVMII 4
+#define RGMII_PORT_MODE_MASK GENMASK(2, 0)
+
+#define RGMII_SYS_LED_CNTRL 0x74
+#define RGMII_SYS_LED_CNTRL_LINK_OVRD BIT(15)
+
+#define TX_SPB_DMA_OFFSET(intf) \
+ ((((intf)->channel) * 0x30) + 0x48180)
+#define TX_SPB_DMA_READ 0x00
+#define TX_SPB_DMA_BASE 0x08
+#define TX_SPB_DMA_END 0x10
+#define TX_SPB_DMA_VALID 0x18
+#define TX_SPB_DMA_FIFO_CTRL 0x20
+#define TX_SPB_DMA_FIFO_FLUSH BIT(0)
+#define TX_SPB_DMA_FIFO_STATUS 0x24
+
+#define TX_SPB_CTRL_OFFSET(intf) \
+ ((((intf)->channel) * 0x68) + 0x49340)
+#define TX_SPB_CTRL_ENABLE 0x0
+#define TX_SPB_CTRL_ENABLE_EN BIT(0)
+#define TX_SPB_CTRL_XF_CTRL2 0x20
+#define TX_SPB_CTRL_XF_BID_SHIFT 16
+
+#define TX_SPB_TOP_OFFSET(intf) \
+ ((((intf)->channel) * 0x1c) + 0x4a0e0)
+#define TX_SPB_TOP_BLKOUT 0x0
+#define TX_SPB_TOP_SPRE_BW_CTRL 0x4
+
+#define TX_EPKT_C_OFFSET(intf) \
+ ((((intf)->channel) * 0x120) + 0x40900)
+#define TX_EPKT_C_CFG_MISC 0x0
+#define TX_EPKT_C_CFG_MISC_EN BIT(0)
+#define TX_EPKT_C_CFG_MISC_PT BIT(1)
+#define TX_EPKT_C_CFG_MISC_PS_SHIFT 14
+#define TX_EPKT_C_CFG_MISC_FD_SHIFT 20
+
+#define TX_PAUSE_CTRL_OFFSET(intf) \
+ ((((intf)->channel * 0xc) + 0x49a20))
+#define TX_PAUSE_MAP_VECTOR 0x8
+
+#define RX_EDPKT_DMA_OFFSET(intf) \
+ ((((intf)->channel) * 0x38) + 0x9ca00)
+#define RX_EDPKT_DMA_WRITE 0x00
+#define RX_EDPKT_DMA_READ 0x08
+#define RX_EDPKT_DMA_BASE 0x10
+#define RX_EDPKT_DMA_END 0x18
+#define RX_EDPKT_DMA_VALID 0x20
+#define RX_EDPKT_DMA_FULLNESS 0x28
+#define RX_EDPKT_DMA_MIN_THRES 0x2c
+#define RX_EDPKT_DMA_CH_XONOFF 0x30
+
+#define RX_EDPKT_CFG_OFFSET(intf) \
+ ((((intf)->channel) * 0x70) + 0x9c600)
+#define RX_EDPKT_CFG_CFG0 0x0
+#define RX_EDPKT_CFG_CFG0_DBUF_SHIFT 9
+#define RX_EDPKT_CFG_CFG0_RBUF 0x0
+#define RX_EDPKT_CFG_CFG0_RBUF_4K 0x1
+#define RX_EDPKT_CFG_CFG0_BUF_4K 0x2
+/* EFRM STUFF, 0 = no byte stuff, 1 = two byte stuff */
+#define RX_EDPKT_CFG_CFG0_EFRM_STUF BIT(11)
+#define RX_EDPKT_CFG_CFG0_BALN_SHIFT 12
+#define RX_EDPKT_CFG_CFG0_NO_ALN 0
+#define RX_EDPKT_CFG_CFG0_4_ALN 2
+#define RX_EDPKT_CFG_CFG0_64_ALN 6
+#define RX_EDPKT_RING_BUFFER_WRITE 0x38
+#define RX_EDPKT_RING_BUFFER_READ 0x40
+#define RX_EDPKT_RING_BUFFER_BASE 0x48
+#define RX_EDPKT_RING_BUFFER_END 0x50
+#define RX_EDPKT_RING_BUFFER_VALID 0x58
+#define RX_EDPKT_CFG_ENABLE 0x6c
+#define RX_EDPKT_CFG_ENABLE_EN BIT(0)
+
+#define RX_SPB_DMA_OFFSET(intf) \
+ ((((intf)->channel) * 0x30) + 0xa0000)
+#define RX_SPB_DMA_READ 0x00
+#define RX_SPB_DMA_BASE 0x08
+#define RX_SPB_DMA_END 0x10
+#define RX_SPB_DMA_VALID 0x18
+#define RX_SPB_DMA_FIFO_CTRL 0x20
+#define RX_SPB_DMA_FIFO_FLUSH BIT(0)
+#define RX_SPB_DMA_FIFO_STATUS 0x24
+
+#define RX_SPB_CTRL_OFFSET(intf) \
+ ((((intf)->channel - 6) * 0x68) + 0xa1000)
+#define RX_SPB_CTRL_ENABLE 0x00
+#define RX_SPB_CTRL_ENABLE_EN BIT(0)
+
+#define RX_PAUSE_CTRL_OFFSET(intf) \
+ ((((intf)->channel - 6) * 0x4) + 0xa1138)
+#define RX_PAUSE_MAP_VECTOR 0x00
+
+#define RX_SPB_TOP_CTRL_OFFSET(intf) \
+ ((((intf)->channel - 6) * 0x14) + 0xa2000)
+#define RX_SPB_TOP_BLKOUT 0x00
+
+#define NUM_4K_BUFFERS 32
+#define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS)
+
+#define DESC_RING_COUNT (64 * NUM_4K_BUFFERS)
+#define DESC_SIZE 16
+#define DESC_RING_SIZE (DESC_RING_COUNT * DESC_SIZE)
+
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e5b54e6025be..a3bbd13c070f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -293,6 +293,60 @@ static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
BNXT_DB_CQ(db, idx);
}
+static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
+{
+ if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
+ return;
+
+ if (BNXT_PF(bp))
+ queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
+ else
+ schedule_delayed_work(&bp->fw_reset_task, delay);
+}
+
+static void __bnxt_queue_sp_work(struct bnxt *bp)
+{
+ if (BNXT_PF(bp))
+ queue_work(bnxt_pf_wq, &bp->sp_task);
+ else
+ schedule_work(&bp->sp_task);
+}
+
+static void bnxt_queue_sp_work(struct bnxt *bp, unsigned int event)
+{
+ set_bit(event, &bp->sp_event);
+ __bnxt_queue_sp_work(bp);
+}
+
+static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+{
+ if (!rxr->bnapi->in_reset) {
+ rxr->bnapi->in_reset = true;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
+ else
+ set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
+ __bnxt_queue_sp_work(bp);
+ }
+ rxr->rx_next_cons = 0xffff;
+}
+
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+ int idx)
+{
+ struct bnxt_napi *bnapi = txr->bnapi;
+
+ if (bnapi->tx_fault)
+ return;
+
+ netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
+ txr->txq_index, bnapi->tx_pkts,
+ txr->tx_cons, txr->tx_prod, idx);
+ WARN_ON_ONCE(1);
+ bnapi->tx_fault = 1;
+ bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
+}
+
const u16 bnxt_lhint_arr[] = {
TX_BD_FLAGS_LHINT_512_AND_SMALLER,
TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -652,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
skb = tx_buf->skb;
tx_buf->skb = NULL;
+ if (unlikely(!skb)) {
+ bnxt_sched_reset_txr(bp, txr, i);
+ return;
+ }
+
tx_bytes += skb->len;
if (tx_buf->is_push) {
@@ -685,7 +744,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
next_tx_int:
cons = NEXT_TX(cons);
- dev_kfree_skb_any(skb);
+ dev_consume_skb_any(skb);
}
WRITE_ONCE(txr->tx_cons, cons);
@@ -1234,38 +1293,6 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
return 0;
}
-static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
-{
- if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
- return;
-
- if (BNXT_PF(bp))
- queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
- else
- schedule_delayed_work(&bp->fw_reset_task, delay);
-}
-
-static void bnxt_queue_sp_work(struct bnxt *bp)
-{
- if (BNXT_PF(bp))
- queue_work(bnxt_pf_wq, &bp->sp_task);
- else
- schedule_work(&bp->sp_task);
-}
-
-static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
-{
- if (!rxr->bnapi->in_reset) {
- rxr->bnapi->in_reset = true;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
- set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
- else
- set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
- rxr->rx_next_cons = 0xffff;
-}
-
static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
{
struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
@@ -1320,7 +1347,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
cons, rxr->rx_next_cons,
TPA_START_ERROR_CODE(tpa_start1));
- bnxt_sched_reset(bp, rxr);
+ bnxt_sched_reset_rxr(bp, rxr);
return;
}
/* Store cfa_code in tpa_info to use in tpa_end
@@ -1844,7 +1871,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (rxr->rx_next_cons != 0xffff)
netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
cons, rxr->rx_next_cons);
- bnxt_sched_reset(bp, rxr);
+ bnxt_sched_reset_rxr(bp, rxr);
if (rc1)
return rc1;
goto next_rx_no_prod_no_len;
@@ -1882,7 +1909,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
!(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
netdev_warn_once(bp->dev, "RX buffer error %x\n",
rx_err);
- bnxt_sched_reset(bp, rxr);
+ bnxt_sched_reset_rxr(bp, rxr);
}
}
goto next_rx_no_len;
@@ -2329,7 +2356,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
goto async_event_process_exit;
}
rxr = bp->bnapi[grp_idx]->rx_ring;
- bnxt_sched_reset(bp, rxr);
+ bnxt_sched_reset_rxr(bp, rxr);
goto async_event_process_exit;
}
case ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST: {
@@ -2384,7 +2411,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
default:
goto async_event_process_exit;
}
- bnxt_queue_sp_work(bp);
+ __bnxt_queue_sp_work(bp);
async_event_process_exit:
return 0;
}
@@ -2413,8 +2440,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
}
set_bit(vf_id - bp->pf.first_vf_id, bp->pf.vf_event_bmap);
- set_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT);
break;
case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT:
@@ -2571,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
{
- if (bnapi->tx_pkts) {
+ if (bnapi->tx_pkts && !bnapi->tx_fault) {
bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
bnapi->tx_pkts = 0;
}
@@ -9424,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr;
+ bnapi->tx_fault = 0;
+
cpr = &bnapi->cp_ring;
if (bnapi->in_reset)
cpr->sw_stats.rx.rx_resets++;
@@ -11031,8 +11059,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
if (mask != vnic->rx_mask || uc_update || mc_update) {
vnic->rx_mask = mask;
- set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
}
}
@@ -11597,8 +11624,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
struct bnxt *bp = netdev_priv(dev);
netdev_err(bp->dev, "TX timeout detected, starting reset task!\n");
- set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
}
static void bnxt_fw_health_check(struct bnxt *bp)
@@ -11635,8 +11661,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
return;
fw_reset:
- set_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_FW_EXCEPTION_SP_EVENT);
}
static void bnxt_timer(struct timer_list *t)
@@ -11653,21 +11678,15 @@ static void bnxt_timer(struct timer_list *t)
if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
bnxt_fw_health_check(bp);
- if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks) {
- set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
+ if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks)
+ bnxt_queue_sp_work(bp, BNXT_PERIODIC_STATS_SP_EVENT);
- if (bnxt_tc_flower_enabled(bp)) {
- set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
+ if (bnxt_tc_flower_enabled(bp))
+ bnxt_queue_sp_work(bp, BNXT_FLOW_STATS_SP_EVENT);
#ifdef CONFIG_RFS_ACCEL
- if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count) {
- set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
+ if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count)
+ bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);
#endif /*CONFIG_RFS_ACCEL*/
if (bp->link_info.phy_retry) {
@@ -11675,21 +11694,17 @@ static void bnxt_timer(struct timer_list *t)
bp->link_info.phy_retry = false;
netdev_warn(bp->dev, "failed to update phy settings after maximum retries.\n");
} else {
- set_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_UPDATE_PHY_SP_EVENT);
}
}
- if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) {
- set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
+ if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
+ bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
- netif_carrier_ok(dev)) {
- set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
- }
+ netif_carrier_ok(dev))
+ bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);
+
bnxt_restart_timer:
mod_timer(&bp->timer, jiffies + bp->current_interval);
}
@@ -12968,8 +12983,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
bp->ntp_fltr_count++;
spin_unlock_bh(&bp->ntp_fltr_lock);
- set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
- bnxt_queue_sp_work(bp);
+ bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);
return new_fltr->sw_id;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 080e73496066..9d16757e27fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1008,6 +1008,7 @@ struct bnxt_napi {
int);
int tx_pkts;
u8 events;
+ u8 tx_fault:1;
u32 flags;
#define BNXT_NAPI_FLAG_XDP 0x1
@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
void bnxt_tx_disable(struct bnxt *bp);
void bnxt_tx_enable(struct bnxt *bp);
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+ int idx);
void bnxt_report_link(struct bnxt *bp);
int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
int bnxt_hwrm_set_pause(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 4efa5fe6972b..5b6fbdc4dc40 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
tx_buf->action = 0;
tx_buf->xdpf = NULL;
} else if (tx_buf->action == XDP_TX) {
+ tx_buf->action = 0;
rx_doorbell_needed = true;
last_tx_cons = tx_cons;
@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
tx_buf = &txr->tx_buf_ring[tx_cons];
page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
}
+ } else {
+ bnxt_sched_reset_txr(bp, txr, i);
+ return;
}
tx_cons = NEXT_TX(tx_cons);
}
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index d6d90f9722a7..31191b520b58 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1037,8 +1037,7 @@ bnad_cb_ccb_destroy(struct bnad *bnad, struct bna_ccb *ccb)
static void
bnad_cb_tx_stall(struct bnad *bnad, struct bna_tx *tx)
{
- struct bnad_tx_info *tx_info =
- (struct bnad_tx_info *)tx->priv;
+ struct bnad_tx_info *tx_info = tx->priv;
struct bna_tcb *tcb;
u32 txq_id;
int i;
@@ -1056,7 +1055,7 @@ bnad_cb_tx_stall(struct bnad *bnad, struct bna_tx *tx)
static void
bnad_cb_tx_resume(struct bnad *bnad, struct bna_tx *tx)
{
- struct bnad_tx_info *tx_info = (struct bnad_tx_info *)tx->priv;
+ struct bnad_tx_info *tx_info = tx->priv;
struct bna_tcb *tcb;
u32 txq_id;
int i;
@@ -1133,7 +1132,7 @@ bnad_tx_cleanup(struct delayed_work *work)
static void
bnad_cb_tx_cleanup(struct bnad *bnad, struct bna_tx *tx)
{
- struct bnad_tx_info *tx_info = (struct bnad_tx_info *)tx->priv;
+ struct bnad_tx_info *tx_info = tx->priv;
struct bna_tcb *tcb;
int i;
@@ -1149,7 +1148,7 @@ bnad_cb_tx_cleanup(struct bnad *bnad, struct bna_tx *tx)
static void
bnad_cb_rx_stall(struct bnad *bnad, struct bna_rx *rx)
{
- struct bnad_rx_info *rx_info = (struct bnad_rx_info *)rx->priv;
+ struct bnad_rx_info *rx_info = rx->priv;
struct bna_ccb *ccb;
struct bnad_rx_ctrl *rx_ctrl;
int i;
@@ -1208,7 +1207,7 @@ bnad_rx_cleanup(void *work)
static void
bnad_cb_rx_cleanup(struct bnad *bnad, struct bna_rx *rx)
{
- struct bnad_rx_info *rx_info = (struct bnad_rx_info *)rx->priv;
+ struct bnad_rx_info *rx_info = rx->priv;
struct bna_ccb *ccb;
struct bnad_rx_ctrl *rx_ctrl;
int i;
@@ -1231,7 +1230,7 @@ bnad_cb_rx_cleanup(struct bnad *bnad, struct bna_rx *rx)
static void
bnad_cb_rx_post(struct bnad *bnad, struct bna_rx *rx)
{
- struct bnad_rx_info *rx_info = (struct bnad_rx_info *)rx->priv;
+ struct bnad_rx_info *rx_info = rx->priv;
struct bna_ccb *ccb;
struct bna_rcb *rcb;
struct bnad_rx_ctrl *rx_ctrl;
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 84751bb303a6..079f9f6ae21a 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -1333,7 +1333,7 @@ static void tsnep_rx_page(struct tsnep_rx *rx, struct napi_struct *napi,
skb = tsnep_build_skb(rx, page, length);
if (skb) {
- page_pool_release_page(rx->page_pool, page);
+ skb_mark_for_recycle(skb);
rx->packets++;
rx->bytes += length;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index a03879a27b04..9135b918dd49 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -177,16 +177,20 @@ static void ftgmac100_write_mac_addr(struct ftgmac100 *priv, const u8 *mac)
iowrite32(laddr, priv->base + FTGMAC100_OFFSET_MAC_LADR);
}
-static void ftgmac100_initial_mac(struct ftgmac100 *priv)
+static int ftgmac100_initial_mac(struct ftgmac100 *priv)
{
u8 mac[ETH_ALEN];
unsigned int m;
unsigned int l;
+ int err;
- if (!device_get_ethdev_address(priv->dev, priv->netdev)) {
+ err = of_get_ethdev_address(priv->dev->of_node, priv->netdev);
+ if (err == -EPROBE_DEFER)
+ return err;
+ if (!err) {
dev_info(priv->dev, "Read MAC address %pM from device tree\n",
priv->netdev->dev_addr);
- return;
+ return 0;
}
m = ioread32(priv->base + FTGMAC100_OFFSET_MAC_MADR);
@@ -207,6 +211,8 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
dev_info(priv->dev, "Generated random MAC address %pM\n",
priv->netdev->dev_addr);
}
+
+ return 0;
}
static int ftgmac100_set_mac_addr(struct net_device *dev, void *p)
@@ -1843,7 +1849,9 @@ static int ftgmac100_probe(struct platform_device *pdev)
priv->aneg_pause = true;
/* MAC address from chip or random one */
- ftgmac100_initial_mac(priv);
+ err = ftgmac100_initial_mac(priv);
+ if (err)
+ goto err_phy_connect;
np = pdev->dev.of_node;
if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac") ||
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 431f8917dc39..85e38a1ec22a 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -3497,7 +3497,7 @@ free_netdev:
return err;
}
-static int dpaa_remove(struct platform_device *pdev)
+static void dpaa_remove(struct platform_device *pdev)
{
struct net_device *net_dev;
struct dpaa_priv *priv;
@@ -3516,6 +3516,9 @@ static int dpaa_remove(struct platform_device *pdev)
phylink_destroy(priv->mac_dev->phylink);
err = dpaa_fq_free(dev, &priv->dpaa_fq_list);
+ if (err)
+ dev_err(dev, "Failed to free FQs on remove (%pE)\n",
+ ERR_PTR(err));
qman_delete_cgr_safe(&priv->ingress_cgr);
qman_release_cgrid(priv->ingress_cgr.cgrid);
@@ -3527,8 +3530,6 @@ static int dpaa_remove(struct platform_device *pdev)
dpaa_bps_free(priv);
free_netdev(net_dev);
-
- return err;
}
static const struct platform_device_id dpaa_devtype[] = {
@@ -3546,7 +3547,7 @@ static struct platform_driver dpaa_driver = {
},
.id_table = dpaa_devtype,
.probe = dpaa_eth_probe,
- .remove = dpaa_remove
+ .remove_new = dpaa_remove
};
static int __init dpaa_load(void)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 63a053dea819..8f1edcca96c4 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -651,12 +651,9 @@ struct fec_enet_private {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_caps;
- unsigned long last_overflow_check;
spinlock_t tmreg_lock;
struct cyclecounter cc;
struct timecounter tc;
- int rx_hwtstamp_filter;
- u32 base_incval;
u32 cycle_speed;
int hwts_rx_en;
int hwts_tx_en;
@@ -679,8 +676,6 @@ struct fec_enet_private {
struct ethtool_eee eee;
unsigned int clk_ref_rate;
- u32 rx_copybreak;
-
/* ptp clock period in ns*/
unsigned int ptp_inc;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 66b5cbdb43b9..14d0dc7ba3c9 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -325,8 +325,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_WOL_FLAG_ENABLE (0x1 << 1)
#define FEC_WOL_FLAG_SLEEP_ON (0x1 << 2)
-#define COPYBREAK_DEFAULT 256
-
/* Max number of allowed TCP segments for software TSO */
#define FEC_MAX_TSO_SEGS 100
#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
@@ -3059,44 +3057,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev,
return 0;
}
-static int fec_enet_get_tunable(struct net_device *netdev,
- const struct ethtool_tunable *tuna,
- void *data)
-{
- struct fec_enet_private *fep = netdev_priv(netdev);
- int ret = 0;
-
- switch (tuna->id) {
- case ETHTOOL_RX_COPYBREAK:
- *(u32 *)data = fep->rx_copybreak;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-static int fec_enet_set_tunable(struct net_device *netdev,
- const struct ethtool_tunable *tuna,
- const void *data)
-{
- struct fec_enet_private *fep = netdev_priv(netdev);
- int ret = 0;
-
- switch (tuna->id) {
- case ETHTOOL_RX_COPYBREAK:
- fep->rx_copybreak = *(u32 *)data;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
/* LPI Sleep Ts count base on tx clk (clk_ref).
* The lpi sleep cnt value = X us / (cycle_ns).
*/
@@ -3234,8 +3194,6 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
.get_sset_count = fec_enet_get_sset_count,
#endif
.get_ts_info = fec_enet_get_ts_info,
- .get_tunable = fec_enet_get_tunable,
- .set_tunable = fec_enet_set_tunable,
.get_wol = fec_enet_get_wol,
.set_wol = fec_enet_set_wol,
.get_eee = fec_enet_get_eee,
@@ -4018,9 +3976,6 @@ static int fec_enet_init(struct net_device *ndev)
if (ret)
goto free_queue_mem;
- /* make sure MAC we just acquired is programmed into the hw */
- fec_set_mac_address(ndev, NULL);
-
/* Set receive and transmit descriptor base. */
for (i = 0; i < fep->num_rx_queues; i++) {
struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i];
@@ -4486,7 +4441,6 @@ fec_probe(struct platform_device *pdev)
if (fep->bufdesc_ex && fep->ptp_clock)
netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);
- fep->rx_copybreak = COPYBREAK_DEFAULT;
INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work);
pm_runtime_mark_last_busy(&pdev->dev);
@@ -4526,7 +4480,7 @@ failed_ioremap:
return ret;
}
-static int
+static void
fec_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
@@ -4562,7 +4516,6 @@ fec_drv_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
free_netdev(ndev);
- return 0;
}
static int __maybe_unused fec_suspend(struct device *dev)
@@ -4718,7 +4671,7 @@ static struct platform_driver fec_driver = {
},
.id_table = fec_devtype,
.probe = fec_probe,
- .remove = fec_drv_remove,
+ .remove_new = fec_drv_remove,
};
module_platform_driver(fec_driver);
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index b88816b71ddf..984ece5cd64f 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -974,7 +974,7 @@ err_netdev:
return rv;
}
-static int
+static void
mpc52xx_fec_remove(struct platform_device *op)
{
struct net_device *ndev;
@@ -998,8 +998,6 @@ mpc52xx_fec_remove(struct platform_device *op)
release_mem_region(ndev->base_addr, sizeof(struct mpc52xx_fec));
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -1042,7 +1040,7 @@ static struct platform_driver mpc52xx_fec_driver = {
.of_match_table = mpc52xx_fec_match,
},
.probe = mpc52xx_fec_probe,
- .remove = mpc52xx_fec_remove,
+ .remove_new = mpc52xx_fec_remove,
#ifdef CONFIG_PM
.suspend = mpc52xx_fec_of_suspend,
.resume = mpc52xx_fec_of_resume,
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
index 95f778cce98c..61d3776d6750 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
@@ -117,7 +117,7 @@ static int mpc52xx_fec_mdio_probe(struct platform_device *of)
return err;
}
-static int mpc52xx_fec_mdio_remove(struct platform_device *of)
+static void mpc52xx_fec_mdio_remove(struct platform_device *of)
{
struct mii_bus *bus = platform_get_drvdata(of);
struct mpc52xx_fec_mdio_priv *priv = bus->priv;
@@ -126,8 +126,6 @@ static int mpc52xx_fec_mdio_remove(struct platform_device *of)
iounmap(priv->regs);
kfree(priv);
mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id mpc52xx_fec_mdio_match[] = {
@@ -145,7 +143,7 @@ struct platform_driver mpc52xx_fec_mdio_driver = {
.of_match_table = mpc52xx_fec_mdio_match,
},
.probe = mpc52xx_fec_mdio_probe,
- .remove = mpc52xx_fec_mdio_remove,
+ .remove_new = mpc52xx_fec_mdio_remove,
};
/* let fec driver call it, since this has to be registered before it */
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index ab86bb8562ef..afc658d2c271 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -443,21 +443,21 @@ static int fec_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
*/
static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
{
- struct fec_enet_private *adapter =
+ struct fec_enet_private *fep =
container_of(ptp, struct fec_enet_private, ptp_caps);
u64 ns;
unsigned long flags;
- mutex_lock(&adapter->ptp_clk_mutex);
+ mutex_lock(&fep->ptp_clk_mutex);
/* Check the ptp clock */
- if (!adapter->ptp_clk_on) {
- mutex_unlock(&adapter->ptp_clk_mutex);
+ if (!fep->ptp_clk_on) {
+ mutex_unlock(&fep->ptp_clk_mutex);
return -EINVAL;
}
- spin_lock_irqsave(&adapter->tmreg_lock, flags);
- ns = timecounter_read(&adapter->tc);
- spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
- mutex_unlock(&adapter->ptp_clk_mutex);
+ spin_lock_irqsave(&fep->tmreg_lock, flags);
+ ns = timecounter_read(&fep->tc);
+ spin_unlock_irqrestore(&fep->tmreg_lock, flags);
+ mutex_unlock(&fep->ptp_clk_mutex);
*ts = ns_to_timespec64(ns);
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 43665806c590..c5045891d694 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -331,12 +331,11 @@ _return_of_node_put:
return err;
}
-static int mac_remove(struct platform_device *pdev)
+static void mac_remove(struct platform_device *pdev)
{
struct mac_device *mac_dev = platform_get_drvdata(pdev);
platform_device_unregister(mac_dev->priv->eth_dev);
- return 0;
}
static struct platform_driver mac_driver = {
@@ -345,7 +344,7 @@ static struct platform_driver mac_driver = {
.of_match_table = mac_match,
},
.probe = mac_probe,
- .remove = mac_remove,
+ .remove_new = mac_remove,
};
builtin_platform_driver(mac_driver);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 8844a9a04fcf..f9f5b28cc72e 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1051,7 +1051,7 @@ out_free_fpi:
return ret;
}
-static int fs_enet_remove(struct platform_device *ofdev)
+static void fs_enet_remove(struct platform_device *ofdev)
{
struct net_device *ndev = platform_get_drvdata(ofdev);
struct fs_enet_private *fep = netdev_priv(ndev);
@@ -1066,7 +1066,6 @@ static int fs_enet_remove(struct platform_device *ofdev)
if (of_phy_is_fixed_link(ofdev->dev.of_node))
of_phy_deregister_fixed_link(ofdev->dev.of_node);
free_netdev(ndev);
- return 0;
}
static const struct of_device_id fs_enet_match[] = {
@@ -1113,7 +1112,7 @@ static struct platform_driver fs_enet_driver = {
.of_match_table = fs_enet_match,
},
.probe = fs_enet_probe,
- .remove = fs_enet_remove,
+ .remove_new = fs_enet_remove,
};
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index 21de56345503..91a69fc2f7c2 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -192,7 +192,7 @@ out:
return ret;
}
-static int fs_enet_mdio_remove(struct platform_device *ofdev)
+static void fs_enet_mdio_remove(struct platform_device *ofdev)
{
struct mii_bus *bus = platform_get_drvdata(ofdev);
struct bb_info *bitbang = bus->priv;
@@ -201,8 +201,6 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev)
free_mdio_bitbang(bus);
iounmap(bitbang->dir);
kfree(bitbang);
-
- return 0;
}
static const struct of_device_id fs_enet_mdio_bb_match[] = {
@@ -219,7 +217,7 @@ static struct platform_driver fs_enet_bb_mdio_driver = {
.of_match_table = fs_enet_mdio_bb_match,
},
.probe = fs_enet_mdio_probe,
- .remove = fs_enet_mdio_remove,
+ .remove_new = fs_enet_mdio_remove,
};
module_platform_driver(fs_enet_bb_mdio_driver);
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index 59a8f0bd0f5c..1910df250c33 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -187,7 +187,7 @@ out:
return ret;
}
-static int fs_enet_mdio_remove(struct platform_device *ofdev)
+static void fs_enet_mdio_remove(struct platform_device *ofdev)
{
struct mii_bus *bus = platform_get_drvdata(ofdev);
struct fec_info *fec = bus->priv;
@@ -196,8 +196,6 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev)
iounmap(fec->fecp);
kfree(fec);
mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id fs_enet_mdio_fec_match[] = {
@@ -220,7 +218,7 @@ static struct platform_driver fs_enet_fec_mdio_driver = {
.of_match_table = fs_enet_mdio_fec_match,
},
.probe = fs_enet_mdio_probe,
- .remove = fs_enet_mdio_remove,
+ .remove_new = fs_enet_mdio_remove,
};
module_platform_driver(fs_enet_fec_mdio_driver);
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 9d58d8334467..01d594886f52 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -511,7 +511,7 @@ error:
}
-static int fsl_pq_mdio_remove(struct platform_device *pdev)
+static void fsl_pq_mdio_remove(struct platform_device *pdev)
{
struct device *device = &pdev->dev;
struct mii_bus *bus = dev_get_drvdata(device);
@@ -521,8 +521,6 @@ static int fsl_pq_mdio_remove(struct platform_device *pdev)
iounmap(priv->map);
mdiobus_free(bus);
-
- return 0;
}
static struct platform_driver fsl_pq_mdio_driver = {
@@ -531,7 +529,7 @@ static struct platform_driver fsl_pq_mdio_driver = {
.of_match_table = fsl_pq_mdio_match,
},
.probe = fsl_pq_mdio_probe,
- .remove = fsl_pq_mdio_remove,
+ .remove_new = fsl_pq_mdio_remove,
};
module_platform_driver(fsl_pq_mdio_driver);
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 38d5013c6fed..0c898f9ee358 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3364,7 +3364,7 @@ register_fail:
return err;
}
-static int gfar_remove(struct platform_device *ofdev)
+static void gfar_remove(struct platform_device *ofdev)
{
struct gfar_private *priv = platform_get_drvdata(ofdev);
struct device_node *np = ofdev->dev.of_node;
@@ -3381,8 +3381,6 @@ static int gfar_remove(struct platform_device *ofdev)
gfar_free_rx_queues(priv);
gfar_free_tx_queues(priv);
free_gfar_dev(priv);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -3642,7 +3640,7 @@ static struct platform_driver gfar_driver = {
.of_match_table = gfar_match,
},
.probe = gfar_probe,
- .remove = gfar_remove,
+ .remove_new = gfar_remove,
};
module_platform_driver(gfar_driver);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 7a4cb4f07c32..2b3a15f24e7c 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3753,7 +3753,7 @@ err_free_info:
return err;
}
-static int ucc_geth_remove(struct platform_device* ofdev)
+static void ucc_geth_remove(struct platform_device* ofdev)
{
struct net_device *dev = platform_get_drvdata(ofdev);
struct ucc_geth_private *ugeth = netdev_priv(dev);
@@ -3767,8 +3767,6 @@ static int ucc_geth_remove(struct platform_device* ofdev)
of_node_put(ugeth->ug_info->phy_node);
kfree(ugeth->ug_info);
free_netdev(dev);
-
- return 0;
}
static const struct of_device_id ucc_geth_match[] = {
@@ -3787,7 +3785,7 @@ static struct platform_driver ucc_geth_driver = {
.of_match_table = ucc_geth_match,
},
.probe = ucc_geth_probe,
- .remove = ucc_geth_remove,
+ .remove_new = ucc_geth_remove,
.suspend = ucc_geth_suspend,
.resume = ucc_geth_resume,
};
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
index f4ae9e19b844..c2874cdcf40c 100644
--- a/drivers/net/ethernet/google/gve/gve_desc.h
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -105,10 +105,10 @@ union gve_rx_data_slot {
__be64 addr;
};
-/* GVE Recive Packet Descriptor Seq No */
+/* GVE Receive Packet Descriptor Seq No */
#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
-/* GVE Recive Packet Descriptor Flags */
+/* GVE Receive Packet Descriptor Flags */
#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 407d30ee55d2..36858a72d771 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -569,8 +569,8 @@ static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
{
- struct hns3_nic_priv *nic_priv = (struct hns3_nic_priv *)handle->priv;
struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+ struct hns3_nic_priv *nic_priv = handle->priv;
struct hns3_enet_ring *ring;
u8 *stat;
int i, j;
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 9232caaf0bdc..409a89d80220 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -217,7 +217,7 @@ static void hns_mdio_cmd_write(struct hns_mdio_device *mdio_dev,
static int hns_mdio_write_c22(struct mii_bus *bus,
int phy_id, int regnum, u16 data)
{
- struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+ struct hns_mdio_device *mdio_dev = bus->priv;
u16 reg = (u16)(regnum & 0xffff);
u16 cmd_reg_cfg;
int ret;
@@ -259,7 +259,7 @@ static int hns_mdio_write_c22(struct mii_bus *bus,
static int hns_mdio_write_c45(struct mii_bus *bus, int phy_id, int devad,
int regnum, u16 data)
{
- struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+ struct hns_mdio_device *mdio_dev = bus->priv;
u16 reg = (u16)(regnum & 0xffff);
u16 cmd_reg_cfg;
int ret;
@@ -312,7 +312,7 @@ static int hns_mdio_write_c45(struct mii_bus *bus, int phy_id, int devad,
*/
static int hns_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum)
{
- struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+ struct hns_mdio_device *mdio_dev = bus->priv;
u16 reg = (u16)(regnum & 0xffff);
u16 reg_val;
int ret;
@@ -363,7 +363,7 @@ static int hns_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum)
static int hns_mdio_read_c45(struct mii_bus *bus, int phy_id, int devad,
int regnum)
{
- struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+ struct hns_mdio_device *mdio_dev = bus->priv;
u16 reg = (u16)(regnum & 0xffff);
u16 reg_val;
int ret;
@@ -424,7 +424,7 @@ static int hns_mdio_read_c45(struct mii_bus *bus, int phy_id, int devad,
*/
static int hns_mdio_reset(struct mii_bus *bus)
{
- struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+ struct hns_mdio_device *mdio_dev = bus->priv;
const struct hns_mdio_sc_reg *sc_reg;
int ret;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 29ad1797adce..982ae70c51e8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3585,11 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (ring->xsk_pool) {
ring->rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool);
- /* For AF_XDP ZC, we disallow packets to span on
- * multiple buffers, thus letting us skip that
- * handling in the fast-path.
- */
- chain_len = 1;
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
@@ -13822,6 +13817,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
+ netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD;
} else {
/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
* are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 8b8bf4880faa..0b3a27f118fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2284,8 +2284,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
-static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc)
+bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 8c3d24012c54..900b0d9ede9f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -473,6 +473,8 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
+bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc);
/**
* i40e_get_head - Retrieve head from head writeback
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index be59ba3774e1..398fb4854cbe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -4305,6 +4305,38 @@ err_out:
}
/**
+ * i40e_check_vf_init_timeout
+ * @vf: the virtual function
+ *
+ * Check that the VF's initialization was successfully done and if not
+ * wait up to 300ms for its finish.
+ *
+ * Returns true when VF is initialized, false on timeout
+ **/
+static bool i40e_check_vf_init_timeout(struct i40e_vf *vf)
+{
+ int i;
+
+ /* When the VF is resetting wait until it is done.
+ * It can take up to 200 milliseconds, but wait for
+ * up to 300 milliseconds to be safe.
+ */
+ for (i = 0; i < 15; i++) {
+ if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+ return true;
+ msleep(20);
+ }
+
+ if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+ dev_err(&vf->pf->pdev->dev,
+ "VF %d still in reset. Try again.\n", vf->vf_id);
+ return false;
+ }
+
+ return true;
+}
+
+/**
* i40e_ndo_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -4322,7 +4354,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
int ret = 0;
struct hlist_node *h;
int bkt;
- u8 i;
if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
@@ -4335,21 +4366,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
goto error_param;
vf = &pf->vf[vf_id];
-
- /* When the VF is resetting wait until it is done.
- * It can take up to 200 milliseconds,
- * but wait for up to 300 milliseconds to be safe.
- * Acquire the VSI pointer only after the VF has been
- * properly initialized.
- */
- for (i = 0; i < 15; i++) {
- if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
- break;
- msleep(20);
- }
- if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
- vf_id);
+ if (!i40e_check_vf_init_timeout(vf)) {
ret = -EAGAIN;
goto error_param;
}
@@ -4451,13 +4468,11 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
}
vf = &pf->vf[vf_id];
- vsi = pf->vsi[vf->lan_vsi_idx];
- if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
- vf_id);
+ if (!i40e_check_vf_init_timeout(vf)) {
ret = -EAGAIN;
goto error_pvid;
}
+ vsi = pf->vsi[vf->lan_vsi_idx];
if (le16_to_cpu(vsi->info.pvid) == vlanprio)
/* duplicate request, so just return success */
@@ -4601,13 +4616,11 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
}
vf = &pf->vf[vf_id];
- vsi = pf->vsi[vf->lan_vsi_idx];
- if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
- vf_id);
+ if (!i40e_check_vf_init_timeout(vf)) {
ret = -EAGAIN;
goto error;
}
+ vsi = pf->vsi[vf->lan_vsi_idx];
ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
if (ret)
@@ -4774,9 +4787,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable)
}
vf = &(pf->vf[vf_id]);
- if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
- vf_id);
+ if (!i40e_check_vf_init_timeout(vf)) {
ret = -EAGAIN;
goto out;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 05ec1181471e..37f41c8a682f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -294,8 +294,14 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
{
unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
+ struct skb_shared_info *sinfo = NULL;
struct sk_buff *skb;
+ u32 nr_frags = 0;
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
net_prefetch(xdp->data_meta);
/* allocate a skb to store the frags */
@@ -312,6 +318,28 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
__skb_pull(skb, metasize);
}
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ for (int i = 0; i < nr_frags; i++) {
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
+ skb_frag_t *frag = &sinfo->frags[i];
+ struct page *page;
+ void *addr;
+
+ page = dev_alloc_page();
+ if (!page) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+ addr = page_to_virt(page);
+
+ memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
+
+ __skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
+ addr, 0, skb_frag_size(frag));
+ }
+
out:
xsk_buff_free(xdp);
return skb;
@@ -322,14 +350,13 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
unsigned int *rx_packets,
unsigned int *rx_bytes,
- unsigned int size,
unsigned int xdp_res,
bool *failure)
{
struct sk_buff *skb;
*rx_packets = 1;
- *rx_bytes = size;
+ *rx_bytes = xdp_get_buff_len(xdp_buff);
if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX)
return;
@@ -363,7 +390,6 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
return;
}
- *rx_bytes = skb->len;
i40e_process_skb_fields(rx_ring, rx_desc, skb);
napi_gro_receive(&rx_ring->q_vector->napi, skb);
return;
@@ -374,6 +400,31 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
WARN_ON_ONCE(1);
}
+static int
+i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
+ struct xdp_buff *xdp, const unsigned int size)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
+
+ if (!xdp_buff_has_frags(first)) {
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(first);
+ }
+
+ if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+ xsk_buff_free(first);
+ return -ENOMEM;
+ }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
+ virt_to_page(xdp->data_hard_start), 0, size);
+ sinfo->xdp_frags_size += size;
+ xsk_buff_add_frag(xdp);
+
+ return 0;
+}
+
/**
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
* @rx_ring: Rx ring
@@ -384,13 +435,18 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 next_to_process = rx_ring->next_to_process;
u16 next_to_clean = rx_ring->next_to_clean;
u16 count_mask = rx_ring->count - 1;
unsigned int xdp_res, xdp_xmit = 0;
+ struct xdp_buff *first = NULL;
struct bpf_prog *xdp_prog;
bool failure = false;
u16 cleaned_count;
+ if (next_to_process != next_to_clean)
+ first = *i40e_rx_bi(rx_ring, next_to_clean);
+
/* NB! xdp_prog will always be !NULL, due to the fact that
* this path is enabled by setting an XDP program.
*/
@@ -404,7 +460,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
unsigned int size;
u64 qword;
- rx_desc = I40E_RX_DESC(rx_ring, next_to_clean);
+ rx_desc = I40E_RX_DESC(rx_ring, next_to_process);
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
/* This memory barrier is needed to keep us from reading
@@ -417,9 +473,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
- bi = *i40e_rx_bi(rx_ring, next_to_clean);
+ bi = *i40e_rx_bi(rx_ring, next_to_process);
xsk_buff_free(bi);
- next_to_clean = (next_to_clean + 1) & count_mask;
+ next_to_process = (next_to_process + 1) & count_mask;
continue;
}
@@ -428,22 +484,35 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
if (!size)
break;
- bi = *i40e_rx_bi(rx_ring, next_to_clean);
+ bi = *i40e_rx_bi(rx_ring, next_to_process);
xsk_buff_set_size(bi, size);
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
- xdp_res = i40e_run_xdp_zc(rx_ring, bi, xdp_prog);
- i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets,
- &rx_bytes, size, xdp_res, &failure);
+ if (!first)
+ first = bi;
+ else if (i40e_add_xsk_frag(rx_ring, first, bi, size))
+ break;
+
+ next_to_process = (next_to_process + 1) & count_mask;
+
+ if (i40e_is_non_eop(rx_ring, rx_desc))
+ continue;
+
+ xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
+ i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
+ &rx_bytes, xdp_res, &failure);
+ first->flags = 0;
+ next_to_clean = next_to_process;
if (failure)
break;
total_rx_packets += rx_packets;
total_rx_bytes += rx_bytes;
xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
- next_to_clean = (next_to_clean + 1) & count_mask;
+ first = NULL;
}
rx_ring->next_to_clean = next_to_clean;
+ rx_ring->next_to_process = next_to_process;
cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask;
if (cleaned_count >= I40E_RX_BUFFER_WRITE)
@@ -466,6 +535,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
unsigned int *total_bytes)
{
+ u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc);
struct i40e_tx_desc *tx_desc;
dma_addr_t dma;
@@ -474,8 +544,7 @@ static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
tx_desc->buffer_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC | I40E_TX_DESC_CMD_EOP,
- 0, desc->len, 0);
+ tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc->len, 0);
*total_bytes += desc->len;
}
@@ -489,14 +558,14 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des
u32 i;
loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]);
+
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc[i].len);
tx_desc = I40E_TX_DESC(xdp_ring, ntu++);
tx_desc->buffer_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC |
- I40E_TX_DESC_CMD_EOP,
- 0, desc[i].len, 0);
+ tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc[i].len, 0);
*total_bytes += desc[i].len;
}
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 817977e3039d..960277d78e09 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -47,5 +47,5 @@ ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
-ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o
+ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o ice_eswitch_br.o
ice-$(CONFIG_GNSS) += ice_gnss.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 4ba3d99439a0..9109006336f0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -370,6 +370,7 @@ struct ice_vsi {
u16 rx_buf_len;
struct ice_aqc_vsi_props info; /* VSI properties */
+ struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored */
/* VSI stats */
struct rtnl_link_stats64 net_stats;
@@ -517,6 +518,7 @@ enum ice_misc_thread_tasks {
struct ice_switchdev_info {
struct ice_vsi *control_vsi;
struct ice_vsi *uplink_vsi;
+ struct ice_esw_br_offloads *br_offloads;
bool is_running;
};
@@ -626,6 +628,7 @@ struct ice_pf {
struct ice_lag *lag; /* Link Aggregation information */
struct ice_switchdev_info switchdev;
+ struct ice_esw_br_port *br_port;
#define ICE_INVALID_AGG_NODE_ID 0
#define ICE_PF_AGG_NODE_ID_START 1
@@ -853,7 +856,7 @@ static inline bool ice_is_adq_active(struct ice_pf *pf)
return false;
}
-bool netif_is_ice(struct net_device *dev);
+bool netif_is_ice(const struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
int ice_vsi_open_ctrl(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index b678bdf96f3a..9ab9fb558b5e 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -408,7 +408,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
*/
static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
{
- int chain_len = ICE_MAX_CHAINED_RX_BUFS;
struct ice_vsi *vsi = ring->vsi;
u32 rxdid = ICE_RXDID_FLEX_NIC;
struct ice_rlan_ctx rlan_ctx;
@@ -472,17 +471,11 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
*/
rlan_ctx.showiv = 0;
- /* For AF_XDP ZC, we disallow packets to span on
- * multiple buffers, thus letting us skip that
- * handling in the fast-path.
- */
- if (ring->xsk_pool)
- chain_len = 1;
/* Max packet size for this queue - must not be set to a larger value
* than 5 x DBUF
*/
rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
- chain_len * ring->rx_buf_len);
+ ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len);
/* Rx queue threshold in units of 64 */
rlan_ctx.lrxqthresh = 1;
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index ad0a007b7398..9a53a5e5d73e 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -4,6 +4,7 @@
#include "ice.h"
#include "ice_lib.h"
#include "ice_eswitch.h"
+#include "ice_eswitch_br.h"
#include "ice_fltr.h"
#include "ice_repr.h"
#include "ice_devlink.h"
@@ -103,17 +104,28 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
rule_added = true;
}
+ vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
+ if (vlan_ops->dis_rx_filtering(uplink_vsi))
+ goto err_dis_rx;
+
if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
goto err_override_uplink;
if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override))
goto err_override_control;
+ if (ice_vsi_update_local_lb(uplink_vsi, true))
+ goto err_override_local_lb;
+
return 0;
+err_override_local_lb:
+ ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
err_override_control:
ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
err_override_uplink:
+ vlan_ops->ena_rx_filtering(uplink_vsi);
+err_dis_rx:
if (rule_added)
ice_clear_dflt_vsi(uplink_vsi);
err_def_rx:
@@ -306,6 +318,9 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi)
repr->src_vsi = vsi;
repr->dst->u.port_info.port_id = vsi->vsi_num;
+ if (repr->br_port)
+ repr->br_port->vsi = vsi;
+
ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
if (ret) {
ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI);
@@ -331,6 +346,9 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
np = netdev_priv(netdev);
vsi = np->vsi;
+ if (!vsi || !ice_is_switchdev_running(vsi->back))
+ return NETDEV_TX_BUSY;
+
if (ice_is_reset_in_progress(vsi->back->state) ||
test_bit(ICE_VF_DIS, vsi->back->state))
return NETDEV_TX_BUSY;
@@ -378,9 +396,14 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
{
struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi_vlan_ops *vlan_ops;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
+ ice_vsi_update_local_lb(uplink_vsi, false);
ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+ vlan_ops->ena_rx_filtering(uplink_vsi);
ice_clear_dflt_vsi(uplink_vsi);
ice_fltr_add_mac_and_broadcast(uplink_vsi,
uplink_vsi->port_info->mac.perm_addr,
@@ -455,16 +478,24 @@ static void ice_eswitch_napi_disable(struct ice_pf *pf)
*/
static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
{
- struct ice_vsi *ctrl_vsi;
+ struct ice_vsi *ctrl_vsi, *uplink_vsi;
+
+ uplink_vsi = ice_get_main_vsi(pf);
+ if (!uplink_vsi)
+ return -ENODEV;
+
+ if (netif_is_any_bridge_port(uplink_vsi->netdev)) {
+ dev_err(ice_pf_to_dev(pf),
+ "Uplink port cannot be a bridge port\n");
+ return -EINVAL;
+ }
pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
if (!pf->switchdev.control_vsi)
return -ENODEV;
ctrl_vsi = pf->switchdev.control_vsi;
- pf->switchdev.uplink_vsi = ice_get_main_vsi(pf);
- if (!pf->switchdev.uplink_vsi)
- goto err_vsi;
+ pf->switchdev.uplink_vsi = uplink_vsi;
if (ice_eswitch_setup_env(pf))
goto err_vsi;
@@ -480,10 +511,15 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
if (ice_vsi_open(ctrl_vsi))
goto err_setup_reprs;
+ if (ice_eswitch_br_offloads_init(pf))
+ goto err_br_offloads;
+
ice_eswitch_napi_enable(pf);
return 0;
+err_br_offloads:
+ ice_vsi_close(ctrl_vsi);
err_setup_reprs:
ice_repr_rem_from_all_vfs(pf);
err_repr_add:
@@ -502,8 +538,8 @@ static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
ice_eswitch_napi_disable(pf);
+ ice_eswitch_br_offloads_deinit(pf);
ice_eswitch_release_env(pf);
- ice_rem_adv_rule_for_vsi(&pf->hw, ctrl_vsi->idx);
ice_eswitch_release_reprs(pf, ctrl_vsi);
ice_vsi_release(ctrl_vsi);
ice_repr_rem_from_all_vfs(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
new file mode 100644
index 000000000000..cc7357ed6e5f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
@@ -0,0 +1,1309 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_eswitch_br.h"
+#include "ice_repr.h"
+#include "ice_switch.h"
+#include "ice_vlan.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_trace.h"
+
+#define ICE_ESW_BRIDGE_UPDATE_INTERVAL msecs_to_jiffies(1000)
+
+static const struct rhashtable_params ice_fdb_ht_params = {
+ .key_offset = offsetof(struct ice_esw_br_fdb_entry, data),
+ .key_len = sizeof(struct ice_esw_br_fdb_data),
+ .head_offset = offsetof(struct ice_esw_br_fdb_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+static bool ice_eswitch_br_is_dev_valid(const struct net_device *dev)
+{
+ /* Accept only PF netdev and PRs */
+ return ice_is_port_repr_netdev(dev) || netif_is_ice(dev);
+}
+
+static struct ice_esw_br_port *
+ice_eswitch_br_netdev_to_port(struct net_device *dev)
+{
+ if (ice_is_port_repr_netdev(dev)) {
+ struct ice_repr *repr = ice_netdev_to_repr(dev);
+
+ return repr->br_port;
+ } else if (netif_is_ice(dev)) {
+ struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+ return pf->br_port;
+ }
+
+ return NULL;
+}
+
+static void
+ice_eswitch_br_ingress_rule_setup(struct ice_adv_rule_info *rule_info,
+ u8 pf_id, u16 vf_vsi_idx)
+{
+ rule_info->sw_act.vsi_handle = vf_vsi_idx;
+ rule_info->sw_act.flag |= ICE_FLTR_RX;
+ rule_info->sw_act.src = pf_id;
+ rule_info->priority = 5;
+}
+
+static void
+ice_eswitch_br_egress_rule_setup(struct ice_adv_rule_info *rule_info,
+ u16 pf_vsi_idx)
+{
+ rule_info->sw_act.vsi_handle = pf_vsi_idx;
+ rule_info->sw_act.flag |= ICE_FLTR_TX;
+ rule_info->flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+ rule_info->flags_info.act_valid = true;
+ rule_info->priority = 5;
+}
+
+static int
+ice_eswitch_br_rule_delete(struct ice_hw *hw, struct ice_rule_query_data *rule)
+{
+ int err;
+
+ if (!rule)
+ return -EINVAL;
+
+ err = ice_rem_adv_rule_by_id(hw, rule);
+ kfree(rule);
+
+ return err;
+}
+
+static u16
+ice_eswitch_br_get_lkups_cnt(u16 vid)
+{
+ return ice_eswitch_br_is_vid_valid(vid) ? 2 : 1;
+}
+
+static void
+ice_eswitch_br_add_vlan_lkup(struct ice_adv_lkup_elem *list, u16 vid)
+{
+ if (ice_eswitch_br_is_vid_valid(vid)) {
+ list[1].type = ICE_VLAN_OFOS;
+ list[1].h_u.vlan_hdr.vlan = cpu_to_be16(vid & VLAN_VID_MASK);
+ list[1].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF);
+ }
+}
+
+static struct ice_rule_query_data *
+ice_eswitch_br_fwd_rule_create(struct ice_hw *hw, int vsi_idx, int port_type,
+ const unsigned char *mac, u16 vid)
+{
+ struct ice_adv_rule_info rule_info = { 0 };
+ struct ice_rule_query_data *rule;
+ struct ice_adv_lkup_elem *list;
+ u16 lkups_cnt;
+ int err;
+
+ lkups_cnt = ice_eswitch_br_get_lkups_cnt(vid);
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return ERR_PTR(-ENOMEM);
+
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list) {
+ err = -ENOMEM;
+ goto err_list_alloc;
+ }
+
+ switch (port_type) {
+ case ICE_ESWITCH_BR_UPLINK_PORT:
+ ice_eswitch_br_egress_rule_setup(&rule_info, vsi_idx);
+ break;
+ case ICE_ESWITCH_BR_VF_REPR_PORT:
+ ice_eswitch_br_ingress_rule_setup(&rule_info, hw->pf_id,
+ vsi_idx);
+ break;
+ default:
+ err = -EINVAL;
+ goto err_add_rule;
+ }
+
+ list[0].type = ICE_MAC_OFOS;
+ ether_addr_copy(list[0].h_u.eth_hdr.dst_addr, mac);
+ eth_broadcast_addr(list[0].m_u.eth_hdr.dst_addr);
+
+ ice_eswitch_br_add_vlan_lkup(list, vid);
+
+ rule_info.need_pass_l2 = true;
+
+ rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+
+ err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, rule);
+ if (err)
+ goto err_add_rule;
+
+ kfree(list);
+
+ return rule;
+
+err_add_rule:
+ kfree(list);
+err_list_alloc:
+ kfree(rule);
+
+ return ERR_PTR(err);
+}
+
+static struct ice_rule_query_data *
+ice_eswitch_br_guard_rule_create(struct ice_hw *hw, u16 vsi_idx,
+ const unsigned char *mac, u16 vid)
+{
+ struct ice_adv_rule_info rule_info = { 0 };
+ struct ice_rule_query_data *rule;
+ struct ice_adv_lkup_elem *list;
+ int err = -ENOMEM;
+ u16 lkups_cnt;
+
+ lkups_cnt = ice_eswitch_br_get_lkups_cnt(vid);
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ goto err_exit;
+
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ goto err_list_alloc;
+
+ list[0].type = ICE_MAC_OFOS;
+ ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac);
+ eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr);
+
+ ice_eswitch_br_add_vlan_lkup(list, vid);
+
+ rule_info.allow_pass_l2 = true;
+ rule_info.sw_act.vsi_handle = vsi_idx;
+ rule_info.sw_act.fltr_act = ICE_NOP;
+ rule_info.priority = 5;
+
+ err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, rule);
+ if (err)
+ goto err_add_rule;
+
+ kfree(list);
+
+ return rule;
+
+err_add_rule:
+ kfree(list);
+err_list_alloc:
+ kfree(rule);
+err_exit:
+ return ERR_PTR(err);
+}
+
+static struct ice_esw_br_flow *
+ice_eswitch_br_flow_create(struct device *dev, struct ice_hw *hw, int vsi_idx,
+ int port_type, const unsigned char *mac, u16 vid)
+{
+ struct ice_rule_query_data *fwd_rule, *guard_rule;
+ struct ice_esw_br_flow *flow;
+ int err;
+
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ fwd_rule = ice_eswitch_br_fwd_rule_create(hw, vsi_idx, port_type, mac,
+ vid);
+ err = PTR_ERR_OR_ZERO(fwd_rule);
+ if (err) {
+ dev_err(dev, "Failed to create eswitch bridge %sgress forward rule, err: %d\n",
+ port_type == ICE_ESWITCH_BR_UPLINK_PORT ? "e" : "in",
+ err);
+ goto err_fwd_rule;
+ }
+
+ guard_rule = ice_eswitch_br_guard_rule_create(hw, vsi_idx, mac, vid);
+ err = PTR_ERR_OR_ZERO(guard_rule);
+ if (err) {
+ dev_err(dev, "Failed to create eswitch bridge %sgress guard rule, err: %d\n",
+ port_type == ICE_ESWITCH_BR_UPLINK_PORT ? "e" : "in",
+ err);
+ goto err_guard_rule;
+ }
+
+ flow->fwd_rule = fwd_rule;
+ flow->guard_rule = guard_rule;
+
+ return flow;
+
+err_guard_rule:
+ ice_eswitch_br_rule_delete(hw, fwd_rule);
+err_fwd_rule:
+ kfree(flow);
+
+ return ERR_PTR(err);
+}
+
+static struct ice_esw_br_fdb_entry *
+ice_eswitch_br_fdb_find(struct ice_esw_br *bridge, const unsigned char *mac,
+ u16 vid)
+{
+ struct ice_esw_br_fdb_data data = {
+ .vid = vid,
+ };
+
+ ether_addr_copy(data.addr, mac);
+ return rhashtable_lookup_fast(&bridge->fdb_ht, &data,
+ ice_fdb_ht_params);
+}
+
+static void
+ice_eswitch_br_flow_delete(struct ice_pf *pf, struct ice_esw_br_flow *flow)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ err = ice_eswitch_br_rule_delete(&pf->hw, flow->fwd_rule);
+ if (err)
+ dev_err(dev, "Failed to delete FDB forward rule, err: %d\n",
+ err);
+
+ err = ice_eswitch_br_rule_delete(&pf->hw, flow->guard_rule);
+ if (err)
+ dev_err(dev, "Failed to delete FDB guard rule, err: %d\n",
+ err);
+
+ kfree(flow);
+}
+
+static struct ice_esw_br_vlan *
+ice_esw_br_port_vlan_lookup(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid)
+{
+ struct ice_pf *pf = bridge->br_offloads->pf;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_esw_br_port *port;
+ struct ice_esw_br_vlan *vlan;
+
+ port = xa_load(&bridge->ports, vsi_idx);
+ if (!port) {
+ dev_info(dev, "Bridge port lookup failed (vsi=%u)\n", vsi_idx);
+ return ERR_PTR(-EINVAL);
+ }
+
+ vlan = xa_load(&port->vlans, vid);
+ if (!vlan) {
+ dev_info(dev, "Bridge port vlan metadata lookup failed (vsi=%u)\n",
+ vsi_idx);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return vlan;
+}
+
+static void
+ice_eswitch_br_fdb_entry_delete(struct ice_esw_br *bridge,
+ struct ice_esw_br_fdb_entry *fdb_entry)
+{
+ struct ice_pf *pf = bridge->br_offloads->pf;
+
+ rhashtable_remove_fast(&bridge->fdb_ht, &fdb_entry->ht_node,
+ ice_fdb_ht_params);
+ list_del(&fdb_entry->list);
+
+ ice_eswitch_br_flow_delete(pf, fdb_entry->flow);
+
+ kfree(fdb_entry);
+}
+
+static void
+ice_eswitch_br_fdb_offload_notify(struct net_device *dev,
+ const unsigned char *mac, u16 vid,
+ unsigned long val)
+{
+ struct switchdev_notifier_fdb_info fdb_info = {
+ .addr = mac,
+ .vid = vid,
+ .offloaded = true,
+ };
+
+ call_switchdev_notifiers(val, dev, &fdb_info.info, NULL);
+}
+
+static void
+ice_eswitch_br_fdb_entry_notify_and_cleanup(struct ice_esw_br *bridge,
+ struct ice_esw_br_fdb_entry *entry)
+{
+ if (!(entry->flags & ICE_ESWITCH_BR_FDB_ADDED_BY_USER))
+ ice_eswitch_br_fdb_offload_notify(entry->dev, entry->data.addr,
+ entry->data.vid,
+ SWITCHDEV_FDB_DEL_TO_BRIDGE);
+ ice_eswitch_br_fdb_entry_delete(bridge, entry);
+}
+
+static void
+ice_eswitch_br_fdb_entry_find_and_delete(struct ice_esw_br *bridge,
+ const unsigned char *mac, u16 vid)
+{
+ struct ice_pf *pf = bridge->br_offloads->pf;
+ struct ice_esw_br_fdb_entry *fdb_entry;
+ struct device *dev = ice_pf_to_dev(pf);
+
+ fdb_entry = ice_eswitch_br_fdb_find(bridge, mac, vid);
+ if (!fdb_entry) {
+ dev_err(dev, "FDB entry with mac: %pM and vid: %u not found\n",
+ mac, vid);
+ return;
+ }
+
+ trace_ice_eswitch_br_fdb_entry_find_and_delete(fdb_entry);
+ ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, fdb_entry);
+}
+
+static void
+ice_eswitch_br_fdb_entry_create(struct net_device *netdev,
+ struct ice_esw_br_port *br_port,
+ bool added_by_user,
+ const unsigned char *mac, u16 vid)
+{
+ struct ice_esw_br *bridge = br_port->bridge;
+ struct ice_pf *pf = bridge->br_offloads->pf;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_esw_br_fdb_entry *fdb_entry;
+ struct ice_esw_br_flow *flow;
+ struct ice_esw_br_vlan *vlan;
+ struct ice_hw *hw = &pf->hw;
+ unsigned long event;
+ int err;
+
+ /* untagged filtering is not yet supported */
+ if (!(bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING) && vid)
+ return;
+
+ if ((bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING)) {
+ vlan = ice_esw_br_port_vlan_lookup(bridge, br_port->vsi_idx,
+ vid);
+ if (IS_ERR(vlan)) {
+ dev_err(dev, "Failed to find vlan lookup, err: %ld\n",
+ PTR_ERR(vlan));
+ return;
+ }
+ }
+
+ fdb_entry = ice_eswitch_br_fdb_find(bridge, mac, vid);
+ if (fdb_entry)
+ ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, fdb_entry);
+
+ fdb_entry = kzalloc(sizeof(*fdb_entry), GFP_KERNEL);
+ if (!fdb_entry) {
+ err = -ENOMEM;
+ goto err_exit;
+ }
+
+ flow = ice_eswitch_br_flow_create(dev, hw, br_port->vsi_idx,
+ br_port->type, mac, vid);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto err_add_flow;
+ }
+
+ ether_addr_copy(fdb_entry->data.addr, mac);
+ fdb_entry->data.vid = vid;
+ fdb_entry->br_port = br_port;
+ fdb_entry->flow = flow;
+ fdb_entry->dev = netdev;
+ fdb_entry->last_use = jiffies;
+ event = SWITCHDEV_FDB_ADD_TO_BRIDGE;
+
+ if (added_by_user) {
+ fdb_entry->flags |= ICE_ESWITCH_BR_FDB_ADDED_BY_USER;
+ event = SWITCHDEV_FDB_OFFLOADED;
+ }
+
+ err = rhashtable_insert_fast(&bridge->fdb_ht, &fdb_entry->ht_node,
+ ice_fdb_ht_params);
+ if (err)
+ goto err_fdb_insert;
+
+ list_add(&fdb_entry->list, &bridge->fdb_list);
+ trace_ice_eswitch_br_fdb_entry_create(fdb_entry);
+
+ ice_eswitch_br_fdb_offload_notify(netdev, mac, vid, event);
+
+ return;
+
+err_fdb_insert:
+ ice_eswitch_br_flow_delete(pf, flow);
+err_add_flow:
+ kfree(fdb_entry);
+err_exit:
+ dev_err(dev, "Failed to create fdb entry, err: %d\n", err);
+}
+
+static void
+ice_eswitch_br_fdb_work_dealloc(struct ice_esw_br_fdb_work *fdb_work)
+{
+ kfree(fdb_work->fdb_info.addr);
+ kfree(fdb_work);
+}
+
+static void
+ice_eswitch_br_fdb_event_work(struct work_struct *work)
+{
+ struct ice_esw_br_fdb_work *fdb_work = ice_work_to_fdb_work(work);
+ bool added_by_user = fdb_work->fdb_info.added_by_user;
+ const unsigned char *mac = fdb_work->fdb_info.addr;
+ u16 vid = fdb_work->fdb_info.vid;
+ struct ice_esw_br_port *br_port;
+
+ rtnl_lock();
+
+ br_port = ice_eswitch_br_netdev_to_port(fdb_work->dev);
+ if (!br_port)
+ goto err_exit;
+
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ ice_eswitch_br_fdb_entry_create(fdb_work->dev, br_port,
+ added_by_user, mac, vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ ice_eswitch_br_fdb_entry_find_and_delete(br_port->bridge,
+ mac, vid);
+ break;
+ default:
+ goto err_exit;
+ }
+
+err_exit:
+ rtnl_unlock();
+ dev_put(fdb_work->dev);
+ ice_eswitch_br_fdb_work_dealloc(fdb_work);
+}
+
+static struct ice_esw_br_fdb_work *
+ice_eswitch_br_fdb_work_alloc(struct switchdev_notifier_fdb_info *fdb_info,
+ struct net_device *dev,
+ unsigned long event)
+{
+ struct ice_esw_br_fdb_work *work;
+ unsigned char *mac;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&work->work, ice_eswitch_br_fdb_event_work);
+ memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+ mac = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!mac) {
+ kfree(work);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ether_addr_copy(mac, fdb_info->addr);
+ work->fdb_info.addr = mac;
+ work->event = event;
+ work->dev = dev;
+
+ return work;
+}
+
+static int
+ice_eswitch_br_switchdev_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_info *info = ptr;
+ struct ice_esw_br_offloads *br_offloads;
+ struct ice_esw_br_fdb_work *work;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper;
+
+ br_offloads = ice_nb_to_br_offloads(nb, switchdev_nb);
+ extack = switchdev_notifier_info_to_extack(ptr);
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (!upper)
+ return NOTIFY_DONE;
+
+ if (!netif_is_bridge_master(upper))
+ return NOTIFY_DONE;
+
+ if (!ice_eswitch_br_is_dev_valid(dev))
+ return NOTIFY_DONE;
+
+ if (!ice_eswitch_br_netdev_to_port(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = container_of(info, typeof(*fdb_info), info);
+
+ work = ice_eswitch_br_fdb_work_alloc(fdb_info, dev, event);
+ if (IS_ERR(work)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to init switchdev fdb work");
+ return notifier_from_errno(PTR_ERR(work));
+ }
+ dev_hold(dev);
+
+ queue_work(br_offloads->wq, &work->work);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static void ice_eswitch_br_fdb_flush(struct ice_esw_br *bridge)
+{
+ struct ice_esw_br_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+ ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, entry);
+}
+
+static void
+ice_eswitch_br_vlan_filtering_set(struct ice_esw_br *bridge, bool enable)
+{
+ if (enable == !!(bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING))
+ return;
+
+ ice_eswitch_br_fdb_flush(bridge);
+ if (enable)
+ bridge->flags |= ICE_ESWITCH_BR_VLAN_FILTERING;
+ else
+ bridge->flags &= ~ICE_ESWITCH_BR_VLAN_FILTERING;
+}
+
+static void
+ice_eswitch_br_clear_pvid(struct ice_esw_br_port *port)
+{
+ struct ice_vlan port_vlan = ICE_VLAN(ETH_P_8021Q, port->pvid, 0);
+ struct ice_vsi_vlan_ops *vlan_ops;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(port->vsi);
+
+ vlan_ops->del_vlan(port->vsi, &port_vlan);
+ vlan_ops->clear_port_vlan(port->vsi);
+
+ ice_vf_vsi_disable_port_vlan(port->vsi);
+
+ port->pvid = 0;
+}
+
+static void
+ice_eswitch_br_vlan_cleanup(struct ice_esw_br_port *port,
+ struct ice_esw_br_vlan *vlan)
+{
+ struct ice_esw_br_fdb_entry *fdb_entry, *tmp;
+ struct ice_esw_br *bridge = port->bridge;
+
+ trace_ice_eswitch_br_vlan_cleanup(vlan);
+
+ list_for_each_entry_safe(fdb_entry, tmp, &bridge->fdb_list, list) {
+ if (vlan->vid == fdb_entry->data.vid)
+ ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
+ }
+
+ xa_erase(&port->vlans, vlan->vid);
+ if (port->pvid == vlan->vid)
+ ice_eswitch_br_clear_pvid(port);
+ kfree(vlan);
+}
+
+static void ice_eswitch_br_port_vlans_flush(struct ice_esw_br_port *port)
+{
+ struct ice_esw_br_vlan *vlan;
+ unsigned long index;
+
+ xa_for_each(&port->vlans, index, vlan)
+ ice_eswitch_br_vlan_cleanup(port, vlan);
+}
+
+static int
+ice_eswitch_br_set_pvid(struct ice_esw_br_port *port,
+ struct ice_esw_br_vlan *vlan)
+{
+ struct ice_vlan port_vlan = ICE_VLAN(ETH_P_8021Q, vlan->vid, 0);
+ struct device *dev = ice_pf_to_dev(port->vsi->back);
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int err;
+
+ if (port->pvid == vlan->vid || vlan->vid == 1)
+ return 0;
+
+ /* Setting port vlan on uplink isn't supported by hw */
+ if (port->type == ICE_ESWITCH_BR_UPLINK_PORT)
+ return -EOPNOTSUPP;
+
+ if (port->pvid) {
+ dev_info(dev,
+ "Port VLAN (vsi=%u, vid=%u) already exists on the port, remove it before adding new one\n",
+ port->vsi_idx, port->pvid);
+ return -EEXIST;
+ }
+
+ ice_vf_vsi_enable_port_vlan(port->vsi);
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(port->vsi);
+ err = vlan_ops->set_port_vlan(port->vsi, &port_vlan);
+ if (err)
+ return err;
+
+ err = vlan_ops->add_vlan(port->vsi, &port_vlan);
+ if (err)
+ return err;
+
+ ice_eswitch_br_port_vlans_flush(port);
+ port->pvid = vlan->vid;
+
+ return 0;
+}
+
+static struct ice_esw_br_vlan *
+ice_eswitch_br_vlan_create(u16 vid, u16 flags, struct ice_esw_br_port *port)
+{
+ struct device *dev = ice_pf_to_dev(port->vsi->back);
+ struct ice_esw_br_vlan *vlan;
+ int err;
+
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return ERR_PTR(-ENOMEM);
+
+ vlan->vid = vid;
+ vlan->flags = flags;
+ if ((flags & BRIDGE_VLAN_INFO_PVID) &&
+ (flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+ err = ice_eswitch_br_set_pvid(port, vlan);
+ if (err)
+ goto err_set_pvid;
+ } else if ((flags & BRIDGE_VLAN_INFO_PVID) ||
+ (flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+ dev_info(dev, "VLAN push and pop are supported only simultaneously\n");
+ err = -EOPNOTSUPP;
+ goto err_set_pvid;
+ }
+
+ err = xa_insert(&port->vlans, vlan->vid, vlan, GFP_KERNEL);
+ if (err)
+ goto err_insert;
+
+ trace_ice_eswitch_br_vlan_create(vlan);
+
+ return vlan;
+
+err_insert:
+ if (port->pvid)
+ ice_eswitch_br_clear_pvid(port);
+err_set_pvid:
+ kfree(vlan);
+ return ERR_PTR(err);
+}
+
+static int
+ice_eswitch_br_port_vlan_add(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid,
+ u16 flags, struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br_port *port;
+ struct ice_esw_br_vlan *vlan;
+
+ port = xa_load(&bridge->ports, vsi_idx);
+ if (!port)
+ return -EINVAL;
+
+ if (port->pvid) {
+ dev_info(ice_pf_to_dev(port->vsi->back),
+ "Port VLAN (vsi=%u, vid=%d) exists on the port, remove it to add trunk VLANs\n",
+ port->vsi_idx, port->pvid);
+ return -EEXIST;
+ }
+
+ vlan = xa_load(&port->vlans, vid);
+ if (vlan) {
+ if (vlan->flags == flags)
+ return 0;
+
+ ice_eswitch_br_vlan_cleanup(port, vlan);
+ }
+
+ vlan = ice_eswitch_br_vlan_create(vid, flags, port);
+ if (IS_ERR(vlan)) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to create VLAN entry, vid: %u, vsi: %u",
+ vid, vsi_idx);
+ return PTR_ERR(vlan);
+ }
+
+ return 0;
+}
+
+static void
+ice_eswitch_br_port_vlan_del(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid)
+{
+ struct ice_esw_br_port *port;
+ struct ice_esw_br_vlan *vlan;
+
+ port = xa_load(&bridge->ports, vsi_idx);
+ if (!port)
+ return;
+
+ vlan = xa_load(&port->vlans, vid);
+ if (!vlan)
+ return;
+
+ ice_eswitch_br_vlan_cleanup(port, vlan);
+}
+
+static int
+ice_eswitch_br_port_obj_add(struct net_device *netdev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+ struct switchdev_obj_port_vlan *vlan;
+ int err;
+
+ if (!br_port)
+ return -EINVAL;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = ice_eswitch_br_port_vlan_add(br_port->bridge,
+ br_port->vsi_idx, vlan->vid,
+ vlan->flags, extack);
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+ice_eswitch_br_port_obj_del(struct net_device *netdev, const void *ctx,
+ const struct switchdev_obj *obj)
+{
+ struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+ struct switchdev_obj_port_vlan *vlan;
+
+ if (!br_port)
+ return -EINVAL;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ ice_eswitch_br_port_vlan_del(br_port->bridge, br_port->vsi_idx,
+ vlan->vid);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+ice_eswitch_br_port_obj_attr_set(struct net_device *netdev, const void *ctx,
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+
+ if (!br_port)
+ return -EINVAL;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ ice_eswitch_br_vlan_filtering_set(br_port->bridge,
+ attr->u.vlan_filtering);
+ return 0;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ br_port->bridge->ageing_time =
+ clock_t_to_jiffies(attr->u.ageing_time);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+ice_eswitch_br_event_blocking(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = switchdev_handle_port_obj_add(dev, ptr,
+ ice_eswitch_br_is_dev_valid,
+ ice_eswitch_br_port_obj_add);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = switchdev_handle_port_obj_del(dev, ptr,
+ ice_eswitch_br_is_dev_valid,
+ ice_eswitch_br_port_obj_del);
+ break;
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = switchdev_handle_port_attr_set(dev, ptr,
+ ice_eswitch_br_is_dev_valid,
+ ice_eswitch_br_port_obj_attr_set);
+ break;
+ default:
+ err = 0;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static void
+ice_eswitch_br_port_deinit(struct ice_esw_br *bridge,
+ struct ice_esw_br_port *br_port)
+{
+ struct ice_esw_br_fdb_entry *fdb_entry, *tmp;
+ struct ice_vsi *vsi = br_port->vsi;
+
+ list_for_each_entry_safe(fdb_entry, tmp, &bridge->fdb_list, list) {
+ if (br_port == fdb_entry->br_port)
+ ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
+ }
+
+ if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back)
+ vsi->back->br_port = NULL;
+ else if (vsi->vf && vsi->vf->repr)
+ vsi->vf->repr->br_port = NULL;
+
+ xa_erase(&bridge->ports, br_port->vsi_idx);
+ ice_eswitch_br_port_vlans_flush(br_port);
+ kfree(br_port);
+}
+
+static struct ice_esw_br_port *
+ice_eswitch_br_port_init(struct ice_esw_br *bridge)
+{
+ struct ice_esw_br_port *br_port;
+
+ br_port = kzalloc(sizeof(*br_port), GFP_KERNEL);
+ if (!br_port)
+ return ERR_PTR(-ENOMEM);
+
+ xa_init(&br_port->vlans);
+
+ br_port->bridge = bridge;
+
+ return br_port;
+}
+
+static int
+ice_eswitch_br_vf_repr_port_init(struct ice_esw_br *bridge,
+ struct ice_repr *repr)
+{
+ struct ice_esw_br_port *br_port;
+ int err;
+
+ br_port = ice_eswitch_br_port_init(bridge);
+ if (IS_ERR(br_port))
+ return PTR_ERR(br_port);
+
+ br_port->vsi = repr->src_vsi;
+ br_port->vsi_idx = br_port->vsi->idx;
+ br_port->type = ICE_ESWITCH_BR_VF_REPR_PORT;
+ repr->br_port = br_port;
+
+ err = xa_insert(&bridge->ports, br_port->vsi_idx, br_port, GFP_KERNEL);
+ if (err) {
+ ice_eswitch_br_port_deinit(bridge, br_port);
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+ice_eswitch_br_uplink_port_init(struct ice_esw_br *bridge, struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = pf->switchdev.uplink_vsi;
+ struct ice_esw_br_port *br_port;
+ int err;
+
+ br_port = ice_eswitch_br_port_init(bridge);
+ if (IS_ERR(br_port))
+ return PTR_ERR(br_port);
+
+ br_port->vsi = vsi;
+ br_port->vsi_idx = br_port->vsi->idx;
+ br_port->type = ICE_ESWITCH_BR_UPLINK_PORT;
+ pf->br_port = br_port;
+
+ err = xa_insert(&bridge->ports, br_port->vsi_idx, br_port, GFP_KERNEL);
+ if (err) {
+ ice_eswitch_br_port_deinit(bridge, br_port);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+ice_eswitch_br_ports_flush(struct ice_esw_br *bridge)
+{
+ struct ice_esw_br_port *port;
+ unsigned long i;
+
+ xa_for_each(&bridge->ports, i, port)
+ ice_eswitch_br_port_deinit(bridge, port);
+}
+
+static void
+ice_eswitch_br_deinit(struct ice_esw_br_offloads *br_offloads,
+ struct ice_esw_br *bridge)
+{
+ if (!bridge)
+ return;
+
+ /* Cleanup all the ports that were added asynchronously
+ * through NETDEV_CHANGEUPPER event.
+ */
+ ice_eswitch_br_ports_flush(bridge);
+ WARN_ON(!xa_empty(&bridge->ports));
+ xa_destroy(&bridge->ports);
+ rhashtable_destroy(&bridge->fdb_ht);
+
+ br_offloads->bridge = NULL;
+ kfree(bridge);
+}
+
+static struct ice_esw_br *
+ice_eswitch_br_init(struct ice_esw_br_offloads *br_offloads, int ifindex)
+{
+ struct ice_esw_br *bridge;
+ int err;
+
+ bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+ if (!bridge)
+ return ERR_PTR(-ENOMEM);
+
+ err = rhashtable_init(&bridge->fdb_ht, &ice_fdb_ht_params);
+ if (err) {
+ kfree(bridge);
+ return ERR_PTR(err);
+ }
+
+ INIT_LIST_HEAD(&bridge->fdb_list);
+ bridge->br_offloads = br_offloads;
+ bridge->ifindex = ifindex;
+ bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
+ xa_init(&bridge->ports);
+ br_offloads->bridge = bridge;
+
+ return bridge;
+}
+
+static struct ice_esw_br *
+ice_eswitch_br_get(struct ice_esw_br_offloads *br_offloads, int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br *bridge = br_offloads->bridge;
+
+ if (bridge) {
+ if (bridge->ifindex != ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only one bridge is supported per eswitch");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ return bridge;
+ }
+
+ /* Create the bridge if it doesn't exist yet */
+ bridge = ice_eswitch_br_init(br_offloads, ifindex);
+ if (IS_ERR(bridge))
+ NL_SET_ERR_MSG_MOD(extack, "Failed to init the bridge");
+
+ return bridge;
+}
+
+static void
+ice_eswitch_br_verify_deinit(struct ice_esw_br_offloads *br_offloads,
+ struct ice_esw_br *bridge)
+{
+ /* Remove the bridge if it exists and there are no ports left */
+ if (!bridge || !xa_empty(&bridge->ports))
+ return;
+
+ ice_eswitch_br_deinit(br_offloads, bridge);
+}
+
+static int
+ice_eswitch_br_port_unlink(struct ice_esw_br_offloads *br_offloads,
+ struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(dev);
+ struct ice_esw_br *bridge;
+
+ if (!br_port) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port representor is not attached to any bridge");
+ return -EINVAL;
+ }
+
+ if (br_port->bridge->ifindex != ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port representor is attached to another bridge");
+ return -EINVAL;
+ }
+
+ bridge = br_port->bridge;
+
+ trace_ice_eswitch_br_port_unlink(br_port);
+ ice_eswitch_br_port_deinit(br_port->bridge, br_port);
+ ice_eswitch_br_verify_deinit(br_offloads, bridge);
+
+ return 0;
+}
+
+static int
+ice_eswitch_br_port_link(struct ice_esw_br_offloads *br_offloads,
+ struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_esw_br *bridge;
+ int err;
+
+ if (ice_eswitch_br_netdev_to_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port is already attached to the bridge");
+ return -EINVAL;
+ }
+
+ bridge = ice_eswitch_br_get(br_offloads, ifindex, extack);
+ if (IS_ERR(bridge))
+ return PTR_ERR(bridge);
+
+ if (ice_is_port_repr_netdev(dev)) {
+ struct ice_repr *repr = ice_netdev_to_repr(dev);
+
+ err = ice_eswitch_br_vf_repr_port_init(bridge, repr);
+ trace_ice_eswitch_br_port_link(repr->br_port);
+ } else {
+ struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+ err = ice_eswitch_br_uplink_port_init(bridge, pf);
+ trace_ice_eswitch_br_port_link(pf->br_port);
+ }
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to init bridge port");
+ goto err_port_init;
+ }
+
+ return 0;
+
+err_port_init:
+ ice_eswitch_br_verify_deinit(br_offloads, bridge);
+ return err;
+}
+
+static int
+ice_eswitch_br_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct ice_esw_br_offloads *br_offloads;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper;
+
+ br_offloads = ice_nb_to_br_offloads(nb, netdev_nb);
+
+ if (!ice_eswitch_br_is_dev_valid(dev))
+ return 0;
+
+ upper = info->upper_dev;
+ if (!netif_is_bridge_master(upper))
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (info->linking)
+ return ice_eswitch_br_port_link(br_offloads, dev,
+ upper->ifindex, extack);
+ else
+ return ice_eswitch_br_port_unlink(br_offloads, dev,
+ upper->ifindex, extack);
+}
+
+static int
+ice_eswitch_br_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ int err = 0;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ err = ice_eswitch_br_port_changeupper(nb, ptr);
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static void
+ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
+{
+ struct ice_esw_br_offloads *br_offloads = pf->switchdev.br_offloads;
+
+ ASSERT_RTNL();
+
+ if (!br_offloads)
+ return;
+
+ ice_eswitch_br_deinit(br_offloads, br_offloads->bridge);
+
+ pf->switchdev.br_offloads = NULL;
+ kfree(br_offloads);
+}
+
+static struct ice_esw_br_offloads *
+ice_eswitch_br_offloads_alloc(struct ice_pf *pf)
+{
+ struct ice_esw_br_offloads *br_offloads;
+
+ ASSERT_RTNL();
+
+ if (pf->switchdev.br_offloads)
+ return ERR_PTR(-EEXIST);
+
+ br_offloads = kzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+
+ pf->switchdev.br_offloads = br_offloads;
+ br_offloads->pf = pf;
+
+ return br_offloads;
+}
+
+void
+ice_eswitch_br_offloads_deinit(struct ice_pf *pf)
+{
+ struct ice_esw_br_offloads *br_offloads;
+
+ br_offloads = pf->switchdev.br_offloads;
+ if (!br_offloads)
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+ unregister_netdevice_notifier(&br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+ unregister_switchdev_notifier(&br_offloads->switchdev_nb);
+ destroy_workqueue(br_offloads->wq);
+ /* Although notifier block is unregistered just before,
+ * so we don't get any new events, some events might be
+ * already in progress. Hold the rtnl lock and wait for
+ * them to finished.
+ */
+ rtnl_lock();
+ ice_eswitch_br_offloads_dealloc(pf);
+ rtnl_unlock();
+}
+
+static void ice_eswitch_br_update(struct ice_esw_br_offloads *br_offloads)
+{
+ struct ice_esw_br *bridge = br_offloads->bridge;
+ struct ice_esw_br_fdb_entry *entry, *tmp;
+
+ if (!bridge)
+ return;
+
+ rtnl_lock();
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+ if (entry->flags & ICE_ESWITCH_BR_FDB_ADDED_BY_USER)
+ continue;
+
+ if (time_is_after_eq_jiffies(entry->last_use +
+ bridge->ageing_time))
+ continue;
+
+ ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, entry);
+ }
+ rtnl_unlock();
+}
+
+static void ice_eswitch_br_update_work(struct work_struct *work)
+{
+ struct ice_esw_br_offloads *br_offloads;
+
+ br_offloads = ice_work_to_br_offloads(work);
+
+ ice_eswitch_br_update(br_offloads);
+
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ ICE_ESW_BRIDGE_UPDATE_INTERVAL);
+}
+
+int
+ice_eswitch_br_offloads_init(struct ice_pf *pf)
+{
+ struct ice_esw_br_offloads *br_offloads;
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ rtnl_lock();
+ br_offloads = ice_eswitch_br_offloads_alloc(pf);
+ rtnl_unlock();
+ if (IS_ERR(br_offloads)) {
+ dev_err(dev, "Failed to init eswitch bridge\n");
+ return PTR_ERR(br_offloads);
+ }
+
+ br_offloads->wq = alloc_ordered_workqueue("ice_bridge_wq", 0);
+ if (!br_offloads->wq) {
+ err = -ENOMEM;
+ dev_err(dev, "Failed to allocate bridge workqueue\n");
+ goto err_alloc_wq;
+ }
+
+ br_offloads->switchdev_nb.notifier_call =
+ ice_eswitch_br_switchdev_event;
+ err = register_switchdev_notifier(&br_offloads->switchdev_nb);
+ if (err) {
+ dev_err(dev,
+ "Failed to register switchdev notifier\n");
+ goto err_reg_switchdev_nb;
+ }
+
+ br_offloads->switchdev_blk.notifier_call =
+ ice_eswitch_br_event_blocking;
+ err = register_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+ if (err) {
+ dev_err(dev,
+ "Failed to register bridge blocking switchdev notifier\n");
+ goto err_reg_switchdev_blk;
+ }
+
+ br_offloads->netdev_nb.notifier_call = ice_eswitch_br_port_event;
+ err = register_netdevice_notifier(&br_offloads->netdev_nb);
+ if (err) {
+ dev_err(dev,
+ "Failed to register bridge port event notifier\n");
+ goto err_reg_netdev_nb;
+ }
+
+ INIT_DELAYED_WORK(&br_offloads->update_work,
+ ice_eswitch_br_update_work);
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ ICE_ESW_BRIDGE_UPDATE_INTERVAL);
+
+ return 0;
+
+err_reg_netdev_nb:
+ unregister_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+err_reg_switchdev_blk:
+ unregister_switchdev_notifier(&br_offloads->switchdev_nb);
+err_reg_switchdev_nb:
+ destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+ rtnl_lock();
+ ice_eswitch_br_offloads_dealloc(pf);
+ rtnl_unlock();
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.h b/drivers/net/ethernet/intel/ice/ice_eswitch_br.h
new file mode 100644
index 000000000000..85a8fadb2928
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023, Intel Corporation. */
+
+#ifndef _ICE_ESWITCH_BR_H_
+#define _ICE_ESWITCH_BR_H_
+
+#include <linux/rhashtable.h>
+#include <linux/workqueue.h>
+
+struct ice_esw_br_fdb_data {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
+struct ice_esw_br_flow {
+ struct ice_rule_query_data *fwd_rule;
+ struct ice_rule_query_data *guard_rule;
+};
+
+enum {
+ ICE_ESWITCH_BR_FDB_ADDED_BY_USER = BIT(0),
+};
+
+struct ice_esw_br_fdb_entry {
+ struct ice_esw_br_fdb_data data;
+ struct rhash_head ht_node;
+ struct list_head list;
+
+ int flags;
+
+ struct net_device *dev;
+ struct ice_esw_br_port *br_port;
+ struct ice_esw_br_flow *flow;
+
+ unsigned long last_use;
+};
+
+enum ice_esw_br_port_type {
+ ICE_ESWITCH_BR_UPLINK_PORT = 0,
+ ICE_ESWITCH_BR_VF_REPR_PORT = 1,
+};
+
+struct ice_esw_br_port {
+ struct ice_esw_br *bridge;
+ struct ice_vsi *vsi;
+ enum ice_esw_br_port_type type;
+ u16 vsi_idx;
+ u16 pvid;
+ struct xarray vlans;
+};
+
+enum {
+ ICE_ESWITCH_BR_VLAN_FILTERING = BIT(0),
+};
+
+struct ice_esw_br {
+ struct ice_esw_br_offloads *br_offloads;
+ struct xarray ports;
+
+ struct rhashtable fdb_ht;
+ struct list_head fdb_list;
+
+ int ifindex;
+ u32 flags;
+ unsigned long ageing_time;
+};
+
+struct ice_esw_br_offloads {
+ struct ice_pf *pf;
+ struct ice_esw_br *bridge;
+ struct notifier_block netdev_nb;
+ struct notifier_block switchdev_blk;
+ struct notifier_block switchdev_nb;
+
+ struct workqueue_struct *wq;
+ struct delayed_work update_work;
+};
+
+struct ice_esw_br_fdb_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ unsigned long event;
+};
+
+struct ice_esw_br_vlan {
+ u16 vid;
+ u16 flags;
+};
+
+#define ice_nb_to_br_offloads(nb, nb_name) \
+ container_of(nb, \
+ struct ice_esw_br_offloads, \
+ nb_name)
+
+#define ice_work_to_br_offloads(w) \
+ container_of(w, \
+ struct ice_esw_br_offloads, \
+ update_work.work)
+
+#define ice_work_to_fdb_work(w) \
+ container_of(w, \
+ struct ice_esw_br_fdb_work, \
+ work)
+
+static inline bool ice_eswitch_br_is_vid_valid(u16 vid)
+{
+ /* In trunk VLAN mode, for untagged traffic the bridge sends requests
+ * to offload VLAN 1 with pvid and untagged flags set. Since these
+ * flags are not supported, add a MAC filter instead.
+ */
+ return vid > 1;
+}
+
+void
+ice_eswitch_br_offloads_deinit(struct ice_pf *pf);
+int
+ice_eswitch_br_offloads_init(struct ice_pf *pf);
+
+#endif /* _ICE_ESWITCH_BR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0054d7e64ec3..a43c23c80565 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -4076,3 +4076,28 @@ void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
{
ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
}
+
+/**
+ * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit
+ * @vsi: pointer to VSI structure
+ * @set: set or unset the bit
+ */
+int
+ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set)
+{
+ struct ice_vsi_ctx ctx = {
+ .info = vsi->info,
+ };
+
+ ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+ if (set)
+ ctx.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_LOCAL_LB;
+ else
+ ctx.info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_LOCAL_LB;
+
+ if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL))
+ return -ENODEV;
+
+ vsi->info = ctx.info;
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index e985766e6bb5..1628385a9672 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -157,6 +157,7 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
+int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set);
int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f02d44455772..9b36cce306b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -80,7 +80,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
void *data,
void (*cleanup)(struct flow_block_cb *block_cb));
-bool netif_is_ice(struct net_device *dev)
+bool netif_is_ice(const struct net_device *dev)
{
return dev && (dev->netdev_ops == &ice_netdev_ops);
}
@@ -3392,6 +3392,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
+ netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
}
/**
@@ -5703,7 +5704,7 @@ static void ice_set_rx_mode(struct net_device *netdev)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
- if (!vsi)
+ if (!vsi || ice_is_switchdev_running(vsi->back))
return;
/* Set the flags to synchronize filters
@@ -6255,7 +6256,7 @@ static void ice_tx_dim_work(struct work_struct *work)
u16 itr;
dim = container_of(work, struct dim, work);
- rc = (struct ice_ring_container *)dim->priv;
+ rc = dim->priv;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
@@ -6275,7 +6276,7 @@ static void ice_rx_dim_work(struct work_struct *work)
u16 itr;
dim = container_of(work, struct dim, work);
- rc = (struct ice_ring_container *)dim->priv;
+ rc = dim->priv;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index e30e12321abd..c686ac0935eb 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -254,7 +254,7 @@ static const struct net_device_ops ice_repr_netdev_ops = {
* ice_is_port_repr_netdev - Check if a given netdevice is a port representor netdev
* @netdev: pointer to netdev
*/
-bool ice_is_port_repr_netdev(struct net_device *netdev)
+bool ice_is_port_repr_netdev(const struct net_device *netdev)
{
return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
index 9c2a6f496b3b..e1ee2d2c1d2d 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.h
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -12,6 +12,7 @@ struct ice_repr {
struct ice_q_vector *q_vector;
struct net_device *netdev;
struct metadata_dst *dst;
+ struct ice_esw_br_port *br_port;
#ifdef CONFIG_ICE_SWITCHDEV
/* info about slow path rule */
struct ice_rule_query_data sp_rule;
@@ -27,5 +28,5 @@ void ice_repr_stop_tx_queues(struct ice_repr *repr);
void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi);
struct ice_repr *ice_netdev_to_repr(struct net_device *netdev);
-bool ice_is_port_repr_netdev(struct net_device *netdev);
+bool ice_is_port_repr_netdev(const struct net_device *netdev);
#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 6db4ca7978cb..813fef5fd748 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2272,6 +2272,10 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid,
/* Propagate some data to the recipe database */
recps[idx].is_root = !!is_root;
recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority;
+ recps[idx].need_pass_l2 = root_bufs.content.act_ctrl &
+ ICE_AQ_RECIPE_ACT_NEED_PASS_L2;
+ recps[idx].allow_pass_l2 = root_bufs.content.act_ctrl &
+ ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2;
bitmap_zero(recps[idx].res_idxs, ICE_MAX_FV_WORDS);
if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) {
recps[idx].chain_idx = root_bufs.content.result_indx &
@@ -4613,13 +4617,13 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
* ice_find_recp - find a recipe
* @hw: pointer to the hardware structure
* @lkup_exts: extension sequence to match
- * @tun_type: type of recipe tunnel
+ * @rinfo: information regarding the rule e.g. priority and action info
*
* Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found.
*/
static u16
ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts,
- enum ice_sw_tunnel_type tun_type)
+ const struct ice_adv_rule_info *rinfo)
{
bool refresh_required = true;
struct ice_sw_recipe *recp;
@@ -4680,9 +4684,12 @@ ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts,
}
/* If for "i"th recipe the found was never set to false
* then it means we found our match
- * Also tun type of recipe needs to be checked
+ * Also tun type and *_pass_l2 of recipe needs to be
+ * checked
*/
- if (found && recp[i].tun_type == tun_type)
+ if (found && recp[i].tun_type == rinfo->tun_type &&
+ recp[i].need_pass_l2 == rinfo->need_pass_l2 &&
+ recp[i].allow_pass_l2 == rinfo->allow_pass_l2)
return i; /* Return the recipe ID */
}
}
@@ -4952,6 +4959,7 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
unsigned long *profiles)
{
DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS);
+ struct ice_aqc_recipe_content *content;
struct ice_aqc_recipe_data_elem *tmp;
struct ice_aqc_recipe_data_elem *buf;
struct ice_recp_grp_entry *entry;
@@ -5012,6 +5020,8 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
if (status)
goto err_unroll;
+ content = &buf[recps].content;
+
/* Clear the result index of the located recipe, as this will be
* updated, if needed, later in the recipe creation process.
*/
@@ -5022,26 +5032,24 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
/* if the recipe is a non-root recipe RID should be programmed
* as 0 for the rules to be applied correctly.
*/
- buf[recps].content.rid = 0;
- memset(&buf[recps].content.lkup_indx, 0,
- sizeof(buf[recps].content.lkup_indx));
+ content->rid = 0;
+ memset(&content->lkup_indx, 0,
+ sizeof(content->lkup_indx));
/* All recipes use look-up index 0 to match switch ID. */
- buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
- buf[recps].content.mask[0] =
- cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+ content->lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+ content->mask[0] = cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
/* Setup lkup_indx 1..4 to INVALID/ignore and set the mask
* to be 0
*/
for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
- buf[recps].content.lkup_indx[i] = 0x80;
- buf[recps].content.mask[i] = 0;
+ content->lkup_indx[i] = 0x80;
+ content->mask[i] = 0;
}
for (i = 0; i < entry->r_group.n_val_pairs; i++) {
- buf[recps].content.lkup_indx[i + 1] = entry->fv_idx[i];
- buf[recps].content.mask[i + 1] =
- cpu_to_le16(entry->fv_mask[i]);
+ content->lkup_indx[i + 1] = entry->fv_idx[i];
+ content->mask[i + 1] = cpu_to_le16(entry->fv_mask[i]);
}
if (rm->n_grp_count > 1) {
@@ -5055,7 +5063,7 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
}
entry->chain_idx = chain_idx;
- buf[recps].content.result_indx =
+ content->result_indx =
ICE_AQ_RECIPE_RESULT_EN |
((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) &
ICE_AQ_RECIPE_RESULT_DATA_M);
@@ -5069,7 +5077,13 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
ICE_MAX_NUM_RECIPES);
set_bit(buf[recps].recipe_indx,
(unsigned long *)buf[recps].recipe_bitmap);
- buf[recps].content.act_ctrl_fwd_priority = rm->priority;
+ content->act_ctrl_fwd_priority = rm->priority;
+
+ if (rm->need_pass_l2)
+ content->act_ctrl |= ICE_AQ_RECIPE_ACT_NEED_PASS_L2;
+
+ if (rm->allow_pass_l2)
+ content->act_ctrl |= ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2;
recps++;
}
@@ -5107,9 +5121,11 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
if (status)
goto err_unroll;
+ content = &buf[recps].content;
+
buf[recps].recipe_indx = (u8)rid;
- buf[recps].content.rid = (u8)rid;
- buf[recps].content.rid |= ICE_AQ_RECIPE_ID_IS_ROOT;
+ content->rid = (u8)rid;
+ content->rid |= ICE_AQ_RECIPE_ID_IS_ROOT;
/* the new entry created should also be part of rg_list to
* make sure we have complete recipe
*/
@@ -5121,16 +5137,13 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
goto err_unroll;
}
last_chain_entry->rid = rid;
- memset(&buf[recps].content.lkup_indx, 0,
- sizeof(buf[recps].content.lkup_indx));
+ memset(&content->lkup_indx, 0, sizeof(content->lkup_indx));
/* All recipes use look-up index 0 to match switch ID. */
- buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
- buf[recps].content.mask[0] =
- cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+ content->lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+ content->mask[0] = cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
- buf[recps].content.lkup_indx[i] =
- ICE_AQ_RECIPE_LKUP_IGNORE;
- buf[recps].content.mask[i] = 0;
+ content->lkup_indx[i] = ICE_AQ_RECIPE_LKUP_IGNORE;
+ content->mask[i] = 0;
}
i = 1;
@@ -5142,8 +5155,8 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
last_chain_entry->chain_idx = ICE_INVAL_CHAIN_IND;
list_for_each_entry(entry, &rm->rg_list, l_entry) {
last_chain_entry->fv_idx[i] = entry->chain_idx;
- buf[recps].content.lkup_indx[i] = entry->chain_idx;
- buf[recps].content.mask[i++] = cpu_to_le16(0xFFFF);
+ content->lkup_indx[i] = entry->chain_idx;
+ content->mask[i++] = cpu_to_le16(0xFFFF);
set_bit(entry->rid, rm->r_bitmap);
}
list_add(&last_chain_entry->l_entry, &rm->rg_list);
@@ -5155,7 +5168,7 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
status = -EINVAL;
goto err_unroll;
}
- buf[recps].content.act_ctrl_fwd_priority = rm->priority;
+ content->act_ctrl_fwd_priority = rm->priority;
recps++;
rm->root_rid = (u8)rid;
@@ -5220,6 +5233,8 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority;
recp->n_grp_count = rm->n_grp_count;
recp->tun_type = rm->tun_type;
+ recp->need_pass_l2 = rm->need_pass_l2;
+ recp->allow_pass_l2 = rm->allow_pass_l2;
recp->recp_created = true;
}
rm->root_buf = buf;
@@ -5388,6 +5403,9 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
/* set the recipe priority if specified */
rm->priority = (u8)rinfo->priority;
+ rm->need_pass_l2 = rinfo->need_pass_l2;
+ rm->allow_pass_l2 = rinfo->allow_pass_l2;
+
/* Find offsets from the field vector. Pick the first one for all the
* recipes.
*/
@@ -5403,7 +5421,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
}
/* Look for a recipe which matches our requested fv / mask list */
- *rid = ice_find_recp(hw, lkup_exts, rinfo->tun_type);
+ *rid = ice_find_recp(hw, lkup_exts, rinfo);
if (*rid < ICE_MAX_NUM_RECIPES)
/* Success if found a recipe that match the existing criteria */
goto err_unroll;
@@ -5839,7 +5857,9 @@ static bool ice_rules_equal(const struct ice_adv_rule_info *first,
return first->sw_act.flag == second->sw_act.flag &&
first->tun_type == second->tun_type &&
first->vlan_type == second->vlan_type &&
- first->src_vsi == second->src_vsi;
+ first->src_vsi == second->src_vsi &&
+ first->need_pass_l2 == second->need_pass_l2 &&
+ first->allow_pass_l2 == second->allow_pass_l2;
}
/**
@@ -6078,7 +6098,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
rinfo->sw_act.fltr_act == ICE_FWD_TO_Q ||
rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP ||
- rinfo->sw_act.fltr_act == ICE_DROP_PACKET)) {
+ rinfo->sw_act.fltr_act == ICE_DROP_PACKET ||
+ rinfo->sw_act.fltr_act == ICE_NOP)) {
status = -EIO;
goto free_pkt_profile;
}
@@ -6089,7 +6110,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
goto free_pkt_profile;
}
- if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI)
+ if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+ rinfo->sw_act.fltr_act == ICE_NOP)
rinfo->sw_act.fwd_id.hw_vsi_id =
ice_get_hw_vsi_num(hw, vsi_handle);
@@ -6159,6 +6181,11 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
ICE_SINGLE_ACT_VALID_BIT;
break;
+ case ICE_NOP:
+ act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M,
+ rinfo->sw_act.fwd_id.hw_vsi_id);
+ act &= ~ICE_SINGLE_ACT_VALID_BIT;
+ break;
default:
status = -EIO;
goto err_ice_add_adv_rule;
@@ -6439,7 +6466,7 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
return -EIO;
}
- rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type);
+ rid = ice_find_recp(hw, &lkup_exts, rinfo);
/* If did not find a recipe that match the existing criteria */
if (rid == ICE_MAX_NUM_RECIPES)
return -EINVAL;
@@ -6533,59 +6560,6 @@ ice_rem_adv_rule_by_id(struct ice_hw *hw,
}
/**
- * ice_rem_adv_rule_for_vsi - removes existing advanced switch rules for a
- * given VSI handle
- * @hw: pointer to the hardware structure
- * @vsi_handle: VSI handle for which we are supposed to remove all the rules.
- *
- * This function is used to remove all the rules for a given VSI and as soon
- * as removing a rule fails, it will return immediately with the error code,
- * else it will return success.
- */
-int ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle)
-{
- struct ice_adv_fltr_mgmt_list_entry *list_itr, *tmp_entry;
- struct ice_vsi_list_map_info *map_info;
- struct ice_adv_rule_info rinfo;
- struct list_head *list_head;
- struct ice_switch_info *sw;
- int status;
- u8 rid;
-
- sw = hw->switch_info;
- for (rid = 0; rid < ICE_MAX_NUM_RECIPES; rid++) {
- if (!sw->recp_list[rid].recp_created)
- continue;
- if (!sw->recp_list[rid].adv_rule)
- continue;
-
- list_head = &sw->recp_list[rid].filt_rules;
- list_for_each_entry_safe(list_itr, tmp_entry, list_head,
- list_entry) {
- rinfo = list_itr->rule_info;
-
- if (rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST) {
- map_info = list_itr->vsi_list_info;
- if (!map_info)
- continue;
-
- if (!test_bit(vsi_handle, map_info->vsi_map))
- continue;
- } else if (rinfo.sw_act.vsi_handle != vsi_handle) {
- continue;
- }
-
- rinfo.sw_act.vsi_handle = vsi_handle;
- status = ice_rem_adv_rule(hw, list_itr->lkups,
- list_itr->lkups_cnt, &rinfo);
- if (status)
- return status;
- }
- }
- return 0;
-}
-
-/**
* ice_replay_vsi_adv_rule - Replay advanced rule for requested VSI
* @hw: pointer to the hardware structure
* @vsi_handle: driver VSI handle
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index c84b56fe84a5..10f5a0ae199e 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -191,6 +191,8 @@ struct ice_adv_rule_info {
u16 vlan_type;
u16 fltr_rule_id;
u32 priority;
+ u16 need_pass_l2:1;
+ u16 allow_pass_l2:1;
u16 src_vsi;
struct ice_sw_act_ctrl sw_act;
struct ice_adv_rule_flags_info flags_info;
@@ -254,6 +256,9 @@ struct ice_sw_recipe {
*/
u8 priority;
+ u8 need_pass_l2:1;
+ u8 allow_pass_l2:1;
+
struct list_head rg_list;
/* AQ buffer associated with this recipe */
@@ -379,7 +384,6 @@ int
ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
bool rm_vlan_promisc);
-int ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle);
int
ice_rem_adv_rule_by_id(struct ice_hw *hw,
struct ice_rule_query_data *remove_entry);
diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h
index ae98d5a8ff60..b2f5c9fe0149 100644
--- a/drivers/net/ethernet/intel/ice/ice_trace.h
+++ b/drivers/net/ethernet/intel/ice/ice_trace.h
@@ -21,6 +21,7 @@
#define _ICE_TRACE_H_
#include <linux/tracepoint.h>
+#include "ice_eswitch_br.h"
/* ice_trace() macro enables shared code to refer to trace points
* like:
@@ -240,6 +241,95 @@ DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_req);
DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_done);
DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_complete);
+DECLARE_EVENT_CLASS(ice_esw_br_fdb_template,
+ TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+ TP_ARGS(fdb),
+ TP_STRUCT__entry(__array(char, dev_name, IFNAMSIZ)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __field(int, flags)),
+ TP_fast_assign(strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->data.addr, ETH_ALEN);
+ __entry->vid = fdb->data.vid;
+ __entry->flags = fdb->flags;),
+ TP_printk("net_device=%s addr=%pM vid=%u flags=%x",
+ __entry->dev_name,
+ __entry->addr,
+ __entry->vid,
+ __entry->flags)
+);
+
+DEFINE_EVENT(ice_esw_br_fdb_template,
+ ice_eswitch_br_fdb_entry_create,
+ TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+ TP_ARGS(fdb)
+);
+
+DEFINE_EVENT(ice_esw_br_fdb_template,
+ ice_eswitch_br_fdb_entry_find_and_delete,
+ TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+ TP_ARGS(fdb)
+);
+
+DECLARE_EVENT_CLASS(ice_esw_br_vlan_template,
+ TP_PROTO(struct ice_esw_br_vlan *vlan),
+ TP_ARGS(vlan),
+ TP_STRUCT__entry(__field(u16, vid)
+ __field(u16, flags)),
+ TP_fast_assign(__entry->vid = vlan->vid;
+ __entry->flags = vlan->flags;),
+ TP_printk("vid=%u flags=%x",
+ __entry->vid,
+ __entry->flags)
+);
+
+DEFINE_EVENT(ice_esw_br_vlan_template,
+ ice_eswitch_br_vlan_create,
+ TP_PROTO(struct ice_esw_br_vlan *vlan),
+ TP_ARGS(vlan)
+);
+
+DEFINE_EVENT(ice_esw_br_vlan_template,
+ ice_eswitch_br_vlan_cleanup,
+ TP_PROTO(struct ice_esw_br_vlan *vlan),
+ TP_ARGS(vlan)
+);
+
+#define ICE_ESW_BR_PORT_NAME_L 16
+
+DECLARE_EVENT_CLASS(ice_esw_br_port_template,
+ TP_PROTO(struct ice_esw_br_port *port),
+ TP_ARGS(port),
+ TP_STRUCT__entry(__field(u16, vport_num)
+ __array(char, port_type, ICE_ESW_BR_PORT_NAME_L)),
+ TP_fast_assign(__entry->vport_num = port->vsi_idx;
+ if (port->type == ICE_ESWITCH_BR_UPLINK_PORT)
+ strscpy(__entry->port_type,
+ "Uplink",
+ ICE_ESW_BR_PORT_NAME_L);
+ else
+ strscpy(__entry->port_type,
+ "VF Representor",
+ ICE_ESW_BR_PORT_NAME_L);),
+ TP_printk("vport_num=%u port type=%s",
+ __entry->vport_num,
+ __entry->port_type)
+);
+
+DEFINE_EVENT(ice_esw_br_port_template,
+ ice_eswitch_br_port_link,
+ TP_PROTO(struct ice_esw_br_port *port),
+ TP_ARGS(port)
+);
+
+DEFINE_EVENT(ice_esw_br_port_template,
+ ice_eswitch_br_port_unlink,
+ TP_PROTO(struct ice_esw_br_port *port),
+ TP_ARGS(port)
+);
+
/* End tracepoints */
#endif /* _ICE_TRACE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a09556e57803..df9171a1a34f 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -1033,6 +1033,7 @@ enum ice_sw_fwd_act_type {
ICE_FWD_TO_Q,
ICE_FWD_TO_QGRP,
ICE_DROP_PACKET,
+ ICE_NOP,
ICE_INVAL_ACT
};
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index b1ffb81893d4..d7b10dc67f03 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -21,6 +21,99 @@ noop_vlan(struct ice_vsi __always_unused *vsi)
return 0;
}
+static void ice_port_vlan_on(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vsi->back;
+
+ if (ice_is_dvm_ena(&pf->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ /* setup outer VLAN ops */
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+ vlan_ops->add_vlan = noop_vlan_arg;
+ vlan_ops->del_vlan = noop_vlan_arg;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ } else {
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
+ }
+ vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+}
+
+static void ice_port_vlan_off(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vsi->back;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+
+ if (ice_is_dvm_ena(&pf->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+ } else {
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ }
+
+ if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+ vlan_ops->ena_rx_filtering = noop_vlan;
+ else
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+}
+
+/**
+ * ice_vf_vsi_enable_port_vlan - Set VSI VLAN ops to support port VLAN
+ * @vsi: VF's VSI being configured
+ *
+ * The function won't create port VLAN, it only allows to create port VLAN
+ * using VLAN ops on the VF VSI.
+ */
+void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi)
+{
+ if (WARN_ON_ONCE(!vsi->vf))
+ return;
+
+ ice_port_vlan_on(vsi);
+}
+
+/**
+ * ice_vf_vsi_disable_port_vlan - Clear VSI support for creating port VLAN
+ * @vsi: VF's VSI being configured
+ *
+ * The function should be called after removing port VLAN on VSI
+ * (using VLAN ops)
+ */
+void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi)
+{
+ if (WARN_ON_ONCE(!vsi->vf))
+ return;
+
+ ice_port_vlan_off(vsi);
+}
+
/**
* ice_vf_vsi_init_vlan_ops - Initialize default VSI VLAN ops for VF VSI
* @vsi: VF's VSI being configured
@@ -39,91 +132,18 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
if (WARN_ON(!vf))
return;
- if (ice_is_dvm_ena(&pf->hw)) {
- vlan_ops = &vsi->outer_vlan_ops;
+ if (ice_vf_is_port_vlan_ena(vf))
+ ice_port_vlan_on(vsi);
+ else
+ ice_port_vlan_off(vsi);
- /* outer VLAN ops regardless of port VLAN config */
- vlan_ops->add_vlan = ice_vsi_add_vlan;
- vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
- vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
-
- if (ice_vf_is_port_vlan_ena(vf)) {
- /* setup outer VLAN ops */
- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
- /* all Rx traffic should be in the domain of the
- * assigned port VLAN, so prevent disabling Rx VLAN
- * filtering
- */
- vlan_ops->dis_rx_filtering = noop_vlan;
- vlan_ops->ena_rx_filtering =
- ice_vsi_ena_rx_vlan_filtering;
-
- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
- vlan_ops->add_vlan = noop_vlan_arg;
- vlan_ops->del_vlan = noop_vlan_arg;
- vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
- vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
- vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
- vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- } else {
- vlan_ops->dis_rx_filtering =
- ice_vsi_dis_rx_vlan_filtering;
-
- if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
- vlan_ops->ena_rx_filtering = noop_vlan;
- else
- vlan_ops->ena_rx_filtering =
- ice_vsi_ena_rx_vlan_filtering;
-
- vlan_ops->del_vlan = ice_vsi_del_vlan;
- vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
- vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
- vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
- vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
-
- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
-
- vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
- vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
- vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
- vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- }
- } else {
- vlan_ops = &vsi->inner_vlan_ops;
+ vlan_ops = ice_is_dvm_ena(&pf->hw) ?
+ &vsi->outer_vlan_ops : &vsi->inner_vlan_ops;
- /* inner VLAN ops regardless of port VLAN config */
- vlan_ops->add_vlan = ice_vsi_add_vlan;
- vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
- vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
- vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
-
- if (ice_vf_is_port_vlan_ena(vf)) {
- vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
- vlan_ops->ena_rx_filtering =
- ice_vsi_ena_rx_vlan_filtering;
- /* all Rx traffic should be in the domain of the
- * assigned port VLAN, so prevent disabling Rx VLAN
- * filtering
- */
- vlan_ops->dis_rx_filtering = noop_vlan;
- } else {
- vlan_ops->dis_rx_filtering =
- ice_vsi_dis_rx_vlan_filtering;
- if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
- vlan_ops->ena_rx_filtering = noop_vlan;
- else
- vlan_ops->ena_rx_filtering =
- ice_vsi_ena_rx_vlan_filtering;
-
- vlan_ops->del_vlan = ice_vsi_del_vlan;
- vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
- vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
- vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
- vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- }
- }
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
index 875a4e615f39..df8aa09df3e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
@@ -13,7 +13,11 @@ void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi);
#ifdef CONFIG_PCI_IOV
void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi);
+void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi);
#else
static inline void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { }
+static inline void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi) { }
+static inline void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi) { }
#endif /* CONFIG_PCI_IOV */
#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
index 5b4a0abb4607..76266e709a39 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -202,6 +202,24 @@ int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi)
return ice_vsi_manage_vlan_insertion(vsi);
}
+static void
+ice_save_vlan_info(struct ice_aqc_vsi_props *info,
+ struct ice_vsi_vlan_info *vlan)
+{
+ vlan->sw_flags2 = info->sw_flags2;
+ vlan->inner_vlan_flags = info->inner_vlan_flags;
+ vlan->outer_vlan_flags = info->outer_vlan_flags;
+}
+
+static void
+ice_restore_vlan_info(struct ice_aqc_vsi_props *info,
+ struct ice_vsi_vlan_info *vlan)
+{
+ info->sw_flags2 = vlan->sw_flags2;
+ info->inner_vlan_flags = vlan->inner_vlan_flags;
+ info->outer_vlan_flags = vlan->outer_vlan_flags;
+}
+
/**
* __ice_vsi_set_inner_port_vlan - set port VLAN VSI context settings to enable a port VLAN
* @vsi: the VSI to update
@@ -218,6 +236,7 @@ static int __ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, u16 pvid_info)
if (!ctxt)
return -ENOMEM;
+ ice_save_vlan_info(&vsi->info, &vsi->vlan_info);
ctxt->info = vsi->info;
info = &ctxt->info;
info->inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED |
@@ -259,6 +278,33 @@ int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
return __ice_vsi_set_inner_port_vlan(vsi, port_vlan_info);
}
+int ice_vsi_clear_inner_port_vlan(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_aqc_vsi_props *info;
+ struct ice_vsi_ctx *ctxt;
+ int ret;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ice_restore_vlan_info(&vsi->info, &vsi->vlan_info);
+ vsi->info.port_based_inner_vlan = 0;
+ ctxt->info = vsi->info;
+ info = &ctxt->info;
+ info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret)
+ dev_err(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+
+ kfree(ctxt);
+ return ret;
+}
+
/**
* ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
* @vsi: VSI to enable or disable VLAN pruning on
@@ -647,6 +693,7 @@ __ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid)
if (!ctxt)
return -ENOMEM;
+ ice_save_vlan_info(&vsi->info, &vsi->vlan_info);
ctxt->info = vsi->info;
ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
@@ -689,9 +736,6 @@ __ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid)
* used if DVM is supported. Also, this function should never be called directly
* as it should be part of ice_vsi_vlan_ops if it's needed.
*
- * This function does not support clearing the port VLAN as there is currently
- * no use case for this.
- *
* Use the ice_vlan structure passed in to set this VSI in a port VLAN.
*/
int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
@@ -705,3 +749,37 @@ int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
return __ice_vsi_set_outer_port_vlan(vsi, port_vlan_info, vlan->tpid);
}
+
+/**
+ * ice_vsi_clear_outer_port_vlan - clear outer port vlan
+ * @vsi: VSI to configure
+ *
+ * The function is restoring previously set vlan config (saved in
+ * vsi->vlan_info). Setting happens in port vlan configuration.
+ */
+int ice_vsi_clear_outer_port_vlan(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ice_restore_vlan_info(&vsi->info, &vsi->vlan_info);
+ vsi->info.port_based_outer_vlan = 0;
+ ctxt->info = vsi->info;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for clearing outer port based VLAN failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+
+ kfree(ctxt);
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
index f459909490ec..f0d84d11bd5b 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
@@ -7,6 +7,12 @@
#include <linux/types.h>
#include "ice_vlan.h"
+struct ice_vsi_vlan_info {
+ u8 sw_flags2;
+ u8 inner_vlan_flags;
+ u8 outer_vlan_flags;
+};
+
struct ice_vsi;
int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
@@ -17,6 +23,7 @@ int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi);
int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, u16 tpid);
int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi);
int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_clear_inner_port_vlan(struct ice_vsi *vsi);
int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi);
int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi);
@@ -28,5 +35,6 @@ int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi);
int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid);
int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi);
int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_clear_outer_port_vlan(struct ice_vsi *vsi);
#endif /* _ICE_VSI_VLAN_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
index 5b47568f6256..b2d2330dedcb 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
@@ -21,6 +21,7 @@ struct ice_vsi_vlan_ops {
int (*ena_tx_filtering)(struct ice_vsi *vsi);
int (*dis_tx_filtering)(struct ice_vsi *vsi);
int (*set_port_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+ int (*clear_port_vlan)(struct ice_vsi *vsi);
};
void ice_vsi_init_vlan_ops(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index a7fe2b4ce655..2a3f0834e139 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -546,19 +546,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
}
/**
- * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
- * @rx_ring: Rx ring
- */
-static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
-{
- int ntc = rx_ring->next_to_clean + 1;
-
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(ICE_RX_DESC(rx_ring, ntc));
-}
-
-/**
* ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
* @rx_ring: Rx ring
* @xdp: Pointer to XDP buffer
@@ -572,8 +559,14 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
+ struct skb_shared_info *sinfo = NULL;
struct sk_buff *skb;
+ u32 nr_frags = 0;
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ }
net_prefetch(xdp->data_meta);
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
@@ -589,6 +582,29 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
__skb_pull(skb, metasize);
}
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ for (int i = 0; i < nr_frags; i++) {
+ struct skb_shared_info *skinfo = skb_shinfo(skb);
+ skb_frag_t *frag = &sinfo->frags[i];
+ struct page *page;
+ void *addr;
+
+ page = dev_alloc_page();
+ if (!page) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+ addr = page_to_virt(page);
+
+ memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
+
+ __skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
+ addr, 0, skb_frag_size(frag));
+ }
+
+out:
xsk_buff_free(xdp);
return skb;
}
@@ -597,7 +613,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
*/
-static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
+static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
@@ -619,7 +635,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
}
if (!completed_frames)
- return;
+ return 0;
if (likely(!xdp_ring->xdp_tx_active)) {
xsk_frames = completed_frames;
@@ -649,6 +665,8 @@ skip:
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+
+ return completed_frames;
}
/**
@@ -666,37 +684,72 @@ skip:
static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
struct ice_tx_ring *xdp_ring)
{
+ struct skb_shared_info *sinfo = NULL;
u32 size = xdp->data_end - xdp->data;
u32 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
- dma_addr_t dma;
-
- if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) {
- ice_clean_xdp_irq_zc(xdp_ring);
- if (!ICE_DESC_UNUSED(xdp_ring)) {
- xdp_ring->ring_stats->tx_stats.tx_busy++;
- return ICE_XDP_CONSUMED;
- }
+ struct xdp_buff *head;
+ u32 nr_frags = 0;
+ u32 free_space;
+ u32 frag = 0;
+
+ free_space = ICE_DESC_UNUSED(xdp_ring);
+ if (free_space < ICE_RING_QUARTER(xdp_ring))
+ free_space += ice_clean_xdp_irq_zc(xdp_ring);
+
+ if (unlikely(!free_space))
+ goto busy;
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = sinfo->nr_frags;
+ if (free_space < nr_frags + 1)
+ goto busy;
}
- dma = xsk_buff_xdp_get_dma(xdp);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
-
- tx_buf = &xdp_ring->tx_buf[ntu];
- tx_buf->xdp = xdp;
- tx_buf->type = ICE_TX_BUF_XSK_TX;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
- tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
- 0, size, 0);
- xdp_ring->xdp_tx_active++;
+ tx_buf = &xdp_ring->tx_buf[ntu];
+ head = xdp;
+
+ for (;;) {
+ dma_addr_t dma;
+
+ dma = xsk_buff_xdp_get_dma(xdp);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+
+ tx_buf->xdp = xdp;
+ tx_buf->type = ICE_TX_BUF_XSK_TX;
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
+ /* account for each xdp_buff from xsk_buff_pool */
+ xdp_ring->xdp_tx_active++;
+
+ if (++ntu == xdp_ring->count)
+ ntu = 0;
+
+ if (frag == nr_frags)
+ break;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+ tx_buf = &xdp_ring->tx_buf[ntu];
+
+ xdp = xsk_buff_get_frag(head);
+ size = skb_frag_size(&sinfo->frags[frag]);
+ frag++;
+ }
- if (++ntu == xdp_ring->count)
- ntu = 0;
xdp_ring->next_to_use = ntu;
+ /* update last descriptor from a frame with EOP */
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
return ICE_XDP_TX;
+
+busy:
+ xdp_ring->ring_stats->tx_stats.tx_busy++;
+
+ return ICE_XDP_CONSUMED;
}
/**
@@ -752,6 +805,34 @@ out_failure:
return result;
}
+static int
+ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
+ struct xdp_buff *xdp, const unsigned int size)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
+
+ if (!size)
+ return 0;
+
+ if (!xdp_buff_has_frags(first)) {
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(first);
+ }
+
+ if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+ xsk_buff_free(first);
+ return -ENOMEM;
+ }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
+ virt_to_page(xdp->data_hard_start), 0, size);
+ sinfo->xdp_frags_size += size;
+ xsk_buff_add_frag(xdp);
+
+ return 0;
+}
+
/**
* ice_clean_rx_irq_zc - consumes packets from the hardware ring
* @rx_ring: AF_XDP Rx ring
@@ -762,9 +843,14 @@ out_failure:
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool;
+ u32 ntc = rx_ring->next_to_clean;
+ u32 ntu = rx_ring->next_to_use;
+ struct xdp_buff *first = NULL;
struct ice_tx_ring *xdp_ring;
unsigned int xdp_xmit = 0;
struct bpf_prog *xdp_prog;
+ u32 cnt = rx_ring->count;
bool failure = false;
int entries_to_alloc;
@@ -774,6 +860,9 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
xdp_ring = rx_ring->xdp_ring;
+ if (ntc != rx_ring->first_desc)
+ first = *ice_xdp_buf(rx_ring, rx_ring->first_desc);
+
while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
unsigned int size, xdp_res = 0;
@@ -783,7 +872,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
u16 vlan_tag = 0;
u16 rx_ptype;
- rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ rx_desc = ICE_RX_DESC(rx_ring, ntc);
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
@@ -795,51 +884,61 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
*/
dma_rmb();
- if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
+ if (unlikely(ntc == ntu))
break;
- xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
+ xdp = *ice_xdp_buf(rx_ring, ntc);
size = le16_to_cpu(rx_desc->wb.pkt_len) &
ICE_RX_FLX_DESC_PKT_LEN_M;
- if (!size) {
- xdp->data = NULL;
- xdp->data_end = NULL;
- xdp->data_hard_start = NULL;
- xdp->data_meta = NULL;
- goto construct_skb;
- }
xsk_buff_set_size(xdp, size);
- xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
+ xsk_buff_dma_sync_for_cpu(xdp, xsk_pool);
+
+ if (!first) {
+ first = xdp;
+ xdp_buff_clear_frags_flag(first);
+ } else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) {
+ break;
+ }
+
+ if (++ntc == cnt)
+ ntc = 0;
- xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
+ if (ice_is_non_eop(rx_ring, rx_desc))
+ continue;
+
+ xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
xdp_xmit |= xdp_res;
} else if (xdp_res == ICE_XDP_EXIT) {
failure = true;
+ first = NULL;
+ rx_ring->first_desc = ntc;
break;
} else if (xdp_res == ICE_XDP_CONSUMED) {
- xsk_buff_free(xdp);
+ xsk_buff_free(first);
} else if (xdp_res == ICE_XDP_PASS) {
goto construct_skb;
}
- total_rx_bytes += size;
+ total_rx_bytes += xdp_get_buff_len(first);
total_rx_packets++;
- ice_bump_ntc(rx_ring);
+ first = NULL;
+ rx_ring->first_desc = ntc;
continue;
construct_skb:
/* XDP_PASS path */
- skb = ice_construct_skb_zc(rx_ring, xdp);
+ skb = ice_construct_skb_zc(rx_ring, first);
if (!skb) {
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
break;
}
- ice_bump_ntc(rx_ring);
+ first = NULL;
+ rx_ring->first_desc = ntc;
if (eth_skb_pad(skb)) {
skb = NULL;
@@ -858,18 +957,22 @@ construct_skb:
ice_receive_skb(rx_ring, skb, vlan_tag);
}
- entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
+ rx_ring->next_to_clean = ntc;
+ entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
- if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
- if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
- xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+ if (xsk_uses_need_wakeup(xsk_pool)) {
+ /* ntu could have changed when allocating entries above, so
+ * use rx_ring value instead of stack based one
+ */
+ if (failure || ntc == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(xsk_pool);
else
- xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+ xsk_clear_rx_need_wakeup(xsk_pool);
return (int)total_rx_packets;
}
@@ -894,7 +997,7 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(desc),
0, desc->len, 0);
*total_bytes += desc->len;
@@ -921,7 +1024,7 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(&descs[i]),
0, descs[i].len, 0);
*total_bytes += descs[i].len;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index bdeb36790d77..2a10254edbbd 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6143,6 +6143,26 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
return 0;
}
+static void igc_taprio_stats(struct net_device *dev,
+ struct tc_taprio_qopt_stats *stats)
+{
+ /* When Strict_End is enabled, the tx_overruns counter
+ * will always be zero.
+ */
+ stats->tx_overruns = 0;
+}
+
+static void igc_taprio_queue_stats(struct net_device *dev,
+ struct tc_taprio_qopt_queue_stats *queue_stats)
+{
+ struct tc_taprio_qopt_stats *stats = &queue_stats->stats;
+
+ /* When Strict_End is enabled, the tx_overruns counter
+ * will always be zero.
+ */
+ stats->tx_overruns = 0;
+}
+
static int igc_save_qbv_schedule(struct igc_adapter *adapter,
struct tc_taprio_qopt_offload *qopt)
{
@@ -6153,11 +6173,20 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
size_t n;
int i;
- if (qopt->cmd == TAPRIO_CMD_DESTROY)
+ switch (qopt->cmd) {
+ case TAPRIO_CMD_REPLACE:
+ break;
+ case TAPRIO_CMD_DESTROY:
return igc_tsn_clear_schedule(adapter);
-
- if (qopt->cmd != TAPRIO_CMD_REPLACE)
+ case TAPRIO_CMD_STATS:
+ igc_taprio_stats(adapter->netdev, &qopt->stats);
+ return 0;
+ case TAPRIO_CMD_QUEUE_STATS:
+ igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats);
+ return 0;
+ default:
return -EOPNOTSUPP;
+ }
if (qopt->base_time < 0)
return -ERANGE;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
index 75e83ea2a926..0f9bc4f8ec3b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
@@ -593,8 +593,6 @@ static int mvpp2_dbgfs_c2_entry_init(struct dentry *parent,
sprintf(c2_entry_name, "%03d", id);
c2_entry_dir = debugfs_create_dir(c2_entry_name, parent);
- if (!c2_entry_dir)
- return -ENOMEM;
entry = &priv->dbgfs_entries->c2_entries[id];
@@ -626,8 +624,6 @@ static int mvpp2_dbgfs_flow_tbl_entry_init(struct dentry *parent,
sprintf(flow_tbl_entry_name, "%03d", id);
flow_tbl_entry_dir = debugfs_create_dir(flow_tbl_entry_name, parent);
- if (!flow_tbl_entry_dir)
- return -ENOMEM;
entry = &priv->dbgfs_entries->flt_entries[id];
@@ -646,12 +642,8 @@ static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv)
int i, ret;
cls_dir = debugfs_create_dir("classifier", parent);
- if (!cls_dir)
- return -ENOMEM;
c2_dir = debugfs_create_dir("c2", cls_dir);
- if (!c2_dir)
- return -ENOMEM;
for (i = 0; i < MVPP22_CLS_C2_N_ENTRIES; i++) {
ret = mvpp2_dbgfs_c2_entry_init(c2_dir, priv, i);
@@ -660,8 +652,6 @@ static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv)
}
flow_tbl_dir = debugfs_create_dir("flow_table", cls_dir);
- if (!flow_tbl_dir)
- return -ENOMEM;
for (i = 0; i < MVPP2_CLS_FLOWS_TBL_SIZE; i++) {
ret = mvpp2_dbgfs_flow_tbl_entry_init(flow_tbl_dir, priv, i);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index eba307eee2b2..ed66c5989102 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -235,7 +235,7 @@ M(NPC_GET_KEX_CFG, 0x600c, npc_get_kex_cfg, \
M(NPC_INSTALL_FLOW, 0x600d, npc_install_flow, \
npc_install_flow_req, npc_install_flow_rsp) \
M(NPC_DELETE_FLOW, 0x600e, npc_delete_flow, \
- npc_delete_flow_req, msg_rsp) \
+ npc_delete_flow_req, npc_delete_flow_rsp) \
M(NPC_MCAM_READ_ENTRY, 0x600f, npc_mcam_read_entry, \
npc_mcam_read_entry_req, \
npc_mcam_read_entry_rsp) \
@@ -1491,6 +1491,8 @@ struct npc_install_flow_req {
u8 vtag0_op;
u16 vtag1_def;
u8 vtag1_op;
+ /* old counter value */
+ u16 cntr_val;
};
struct npc_install_flow_rsp {
@@ -1506,6 +1508,11 @@ struct npc_delete_flow_req {
u8 all; /* PF + VFs */
};
+struct npc_delete_flow_rsp {
+ struct mbox_msghdr hdr;
+ u16 cntr_val;
+};
+
struct npc_mcam_read_entry_req {
struct mbox_msghdr hdr;
u16 entry; /* MCAM entry to read */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 952319453701..9c365cc3e736 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1192,7 +1192,7 @@ find_rule:
write_req.enable_entry = (u8)enable;
/* if counter is available then clear and use it */
if (req->set_cntr && rule->has_cntr) {
- rvu_write64(rvu, blkaddr, NPC_AF_MATCH_STATX(rule->cntr), 0x00);
+ rvu_write64(rvu, blkaddr, NPC_AF_MATCH_STATX(rule->cntr), req->cntr_val);
write_req.set_cntr = 1;
write_req.cntr = rule->cntr;
}
@@ -1407,12 +1407,13 @@ static int npc_delete_flow(struct rvu *rvu, struct rvu_npc_mcam_rule *rule,
int rvu_mbox_handler_npc_delete_flow(struct rvu *rvu,
struct npc_delete_flow_req *req,
- struct msg_rsp *rsp)
+ struct npc_delete_flow_rsp *rsp)
{
struct npc_mcam *mcam = &rvu->hw->mcam;
struct rvu_npc_mcam_rule *iter, *tmp;
u16 pcifunc = req->hdr.pcifunc;
struct list_head del_list;
+ int blkaddr;
INIT_LIST_HEAD(&del_list);
@@ -1428,6 +1429,10 @@ int rvu_mbox_handler_npc_delete_flow(struct rvu *rvu,
list_move_tail(&iter->list, &del_list);
/* single rule */
} else if (req->entry == iter->entry) {
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+ if (blkaddr)
+ rsp->cntr_val = rvu_read64(rvu, blkaddr,
+ NPC_AF_MATCH_STATX(iter->cntr));
list_move_tail(&iter->list, &del_list);
break;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
index 592b317f4637..854045ed3b06 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
@@ -158,6 +158,7 @@ void rvu_switch_enable(struct rvu *rvu)
struct npc_mcam_alloc_entry_req alloc_req = { 0 };
struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 };
struct npc_delete_flow_req uninstall_req = { 0 };
+ struct npc_delete_flow_rsp uninstall_rsp = { 0 };
struct npc_mcam_free_entry_req free_req = { 0 };
struct rvu_switch *rswitch = &rvu->rswitch;
struct msg_rsp rsp;
@@ -197,7 +198,7 @@ void rvu_switch_enable(struct rvu *rvu)
uninstall_rules:
uninstall_req.start = rswitch->start_entry;
uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1;
- rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp);
+ rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &uninstall_rsp);
kfree(rswitch->entry2pcifunc);
free_entries:
free_req.all = 1;
@@ -209,6 +210,7 @@ exit:
void rvu_switch_disable(struct rvu *rvu)
{
struct npc_delete_flow_req uninstall_req = { 0 };
+ struct npc_delete_flow_rsp uninstall_rsp = { 0 };
struct npc_mcam_free_entry_req free_req = { 0 };
struct rvu_switch *rswitch = &rvu->rswitch;
struct rvu_hwinfo *hw = rvu->hw;
@@ -250,7 +252,7 @@ void rvu_switch_disable(struct rvu *rvu)
uninstall_req.start = rswitch->start_entry;
uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1;
free_req.all = 1;
- rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp);
+ rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &uninstall_rsp);
rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp);
rswitch->used_entries = 0;
kfree(rswitch->entry2pcifunc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 77c8f650f7ac..8cdd92dd9762 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -774,6 +774,7 @@ int otx2_txsch_alloc(struct otx2_nic *pfvf)
rsp->schq_list[lvl][schq];
pfvf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl;
+ pfvf->hw.txschq_aggr_lvl_rr_prio = rsp->aggr_lvl_rr_prio;
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index ba8091131ec0..25e99fd2e3fd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -224,6 +224,7 @@ struct otx2_hw {
/* NIX */
u8 txschq_link_cfg_lvl;
+ u8 txschq_aggr_lvl_rr_prio;
u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
u16 matchall_ipolicer;
u32 dwrr_mtu;
@@ -360,13 +361,8 @@ struct otx2_flow_config {
struct list_head flow_list;
u32 dmacflt_max_flows;
u16 max_flows;
-};
-
-struct otx2_tc_info {
- /* hash table to store TC offloaded flows */
- struct rhashtable flow_table;
- struct rhashtable_params flow_ht_params;
- unsigned long *tc_entries_bitmap;
+ struct list_head flow_list_tc;
+ bool ntuple;
};
struct dev_hw_ops {
@@ -491,7 +487,6 @@ struct otx2_nic {
/* NPC MCAM */
struct otx2_flow_config *flow_cfg;
struct otx2_mac_table *mac_table;
- struct otx2_tc_info tc_info;
u64 reset_count;
struct work_struct reset_task;
@@ -1063,7 +1058,6 @@ int otx2_init_tc(struct otx2_nic *nic);
void otx2_shutdown_tc(struct otx2_nic *nic);
int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data);
-int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic);
/* CGX/RPM DMAC filters support */
int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf);
int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u32 bit_pos);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
index 63ef7c41d18d..4e1130496573 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
@@ -41,7 +41,6 @@ static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id,
return 0;
otx2_alloc_mcam_entries(pfvf, ctx->val.vu16);
- otx2_tc_alloc_ent_bitmap(pfvf);
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index c47d91da32dc..9efcec549834 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -764,6 +764,7 @@ static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
struct otx2_nic *pfvf = netdev_priv(dev);
int ret = -EOPNOTSUPP;
+ pfvf->flow_cfg->ntuple = ntuple;
switch (nfc->cmd) {
case ETHTOOL_SRXFH:
ret = otx2_set_rss_hash_opts(pfvf, nfc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 2d7713a1a153..4762dbea64a1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -276,6 +276,7 @@ int otx2vf_mcam_flow_init(struct otx2_nic *pfvf)
flow_cfg = pfvf->flow_cfg;
INIT_LIST_HEAD(&flow_cfg->flow_list);
+ INIT_LIST_HEAD(&flow_cfg->flow_list_tc);
flow_cfg->max_flows = 0;
return 0;
@@ -298,6 +299,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
return -ENOMEM;
INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
+ INIT_LIST_HEAD(&pf->flow_cfg->flow_list_tc);
/* Allocate bare minimum number of MCAM entries needed for
* unicast and ntuple filters.
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 5e56b6c3e60a..1e6fc23eca4f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -34,9 +34,8 @@ struct otx2_tc_flow_stats {
};
struct otx2_tc_flow {
- struct rhash_head node;
+ struct list_head list;
unsigned long cookie;
- unsigned int bitpos;
struct rcu_head rcu;
struct otx2_tc_flow_stats stats;
spinlock_t lock; /* lock for stats */
@@ -44,31 +43,10 @@ struct otx2_tc_flow {
u16 entry;
u16 leaf_profile;
bool is_act_police;
+ u32 prio;
+ struct npc_install_flow_req req;
};
-int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
-{
- struct otx2_tc_info *tc = &nic->tc_info;
-
- if (!nic->flow_cfg->max_flows)
- return 0;
-
- /* Max flows changed, free the existing bitmap */
- kfree(tc->tc_entries_bitmap);
-
- tc->tc_entries_bitmap =
- kcalloc(BITS_TO_LONGS(nic->flow_cfg->max_flows),
- sizeof(long), GFP_KERNEL);
- if (!tc->tc_entries_bitmap) {
- netdev_err(nic->netdev,
- "Unable to alloc TC flow entries bitmap\n");
- return -ENOMEM;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
-
static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
u32 *burst_exp, u32 *burst_mantissa)
{
@@ -707,8 +685,117 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
}
-static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
+static void otx2_destroy_tc_flow_list(struct otx2_nic *pfvf)
+{
+ struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
+ struct otx2_tc_flow *iter, *tmp;
+
+ if (!(pfvf->flags & OTX2_FLAG_MCAM_ENTRIES_ALLOC))
+ return;
+
+ list_for_each_entry_safe(iter, tmp, &flow_cfg->flow_list_tc, list) {
+ list_del(&iter->list);
+ kfree(iter);
+ flow_cfg->nr_flows--;
+ }
+}
+
+static struct otx2_tc_flow *otx2_tc_get_entry_by_cookie(struct otx2_flow_config *flow_cfg,
+ unsigned long cookie)
+{
+ struct otx2_tc_flow *tmp;
+
+ list_for_each_entry(tmp, &flow_cfg->flow_list_tc, list) {
+ if (tmp->cookie == cookie)
+ return tmp;
+ }
+
+ return NULL;
+}
+
+static struct otx2_tc_flow *otx2_tc_get_entry_by_index(struct otx2_flow_config *flow_cfg,
+ int index)
{
+ struct otx2_tc_flow *tmp;
+ int i = 0;
+
+ list_for_each_entry(tmp, &flow_cfg->flow_list_tc, list) {
+ if (i == index)
+ return tmp;
+ i++;
+ }
+
+ return NULL;
+}
+
+static void otx2_tc_del_from_flow_list(struct otx2_flow_config *flow_cfg,
+ struct otx2_tc_flow *node)
+{
+ struct list_head *pos, *n;
+ struct otx2_tc_flow *tmp;
+
+ list_for_each_safe(pos, n, &flow_cfg->flow_list_tc) {
+ tmp = list_entry(pos, struct otx2_tc_flow, list);
+ if (node == tmp) {
+ list_del(&node->list);
+ return;
+ }
+ }
+}
+
+static int otx2_tc_add_to_flow_list(struct otx2_flow_config *flow_cfg,
+ struct otx2_tc_flow *node)
+{
+ struct list_head *pos, *n;
+ struct otx2_tc_flow *tmp;
+ int index = 0;
+
+ /* If the flow list is empty then add the new node */
+ if (list_empty(&flow_cfg->flow_list_tc)) {
+ list_add(&node->list, &flow_cfg->flow_list_tc);
+ return index;
+ }
+
+ list_for_each_safe(pos, n, &flow_cfg->flow_list_tc) {
+ tmp = list_entry(pos, struct otx2_tc_flow, list);
+ if (node->prio < tmp->prio)
+ break;
+ index++;
+ }
+
+ list_add(&node->list, pos->prev);
+ return index;
+}
+
+static int otx2_add_mcam_flow_entry(struct otx2_nic *nic, struct npc_install_flow_req *req)
+{
+ struct npc_install_flow_req *tmp_req;
+ int err;
+
+ mutex_lock(&nic->mbox.lock);
+ tmp_req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+ if (!tmp_req) {
+ mutex_unlock(&nic->mbox.lock);
+ return -ENOMEM;
+ }
+
+ memcpy(tmp_req, req, sizeof(struct npc_install_flow_req));
+ /* Send message to AF */
+ err = otx2_sync_mbox_msg(&nic->mbox);
+ if (err) {
+ netdev_err(nic->netdev, "Failed to install MCAM flow entry %d\n",
+ req->entry);
+ mutex_unlock(&nic->mbox.lock);
+ return -EFAULT;
+ }
+
+ mutex_unlock(&nic->mbox.lock);
+ return 0;
+}
+
+static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry, u16 *cntr_val)
+{
+ struct npc_delete_flow_rsp *rsp;
struct npc_delete_flow_req *req;
int err;
@@ -729,22 +816,113 @@ static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
mutex_unlock(&nic->mbox.lock);
return -EFAULT;
}
+
+ if (cntr_val) {
+ rsp = (struct npc_delete_flow_rsp *)otx2_mbox_get_rsp(&nic->mbox.mbox,
+ 0, &req->hdr);
+ if (IS_ERR(rsp)) {
+ netdev_err(nic->netdev, "Failed to get MCAM delete response for entry %d\n",
+ entry);
+ mutex_unlock(&nic->mbox.lock);
+ return -EFAULT;
+ }
+
+ *cntr_val = rsp->cntr_val;
+ }
+
mutex_unlock(&nic->mbox.lock);
+ return 0;
+}
+
+static int otx2_tc_update_mcam_table_del_req(struct otx2_nic *nic,
+ struct otx2_flow_config *flow_cfg,
+ struct otx2_tc_flow *node)
+{
+ struct list_head *pos, *n;
+ struct otx2_tc_flow *tmp;
+ int i = 0, index = 0;
+ u16 cntr_val;
+
+ /* Find and delete the entry from the list and re-install
+ * all the entries from beginning to the index of the
+ * deleted entry to higher mcam indexes.
+ */
+ list_for_each_safe(pos, n, &flow_cfg->flow_list_tc) {
+ tmp = list_entry(pos, struct otx2_tc_flow, list);
+ if (node == tmp) {
+ list_del(&tmp->list);
+ break;
+ }
+
+ otx2_del_mcam_flow_entry(nic, tmp->entry, &cntr_val);
+ tmp->entry++;
+ tmp->req.entry = tmp->entry;
+ tmp->req.cntr_val = cntr_val;
+ index++;
+ }
+
+ list_for_each_safe(pos, n, &flow_cfg->flow_list_tc) {
+ if (i == index)
+ break;
+
+ tmp = list_entry(pos, struct otx2_tc_flow, list);
+ otx2_add_mcam_flow_entry(nic, &tmp->req);
+ i++;
+ }
return 0;
}
+static int otx2_tc_update_mcam_table_add_req(struct otx2_nic *nic,
+ struct otx2_flow_config *flow_cfg,
+ struct otx2_tc_flow *node)
+{
+ int mcam_idx = flow_cfg->max_flows - flow_cfg->nr_flows - 1;
+ struct otx2_tc_flow *tmp;
+ int list_idx, i;
+ u16 cntr_val;
+
+ /* Find the index of the entry(list_idx) whose priority
+ * is greater than the new entry and re-install all
+ * the entries from beginning to list_idx to higher
+ * mcam indexes.
+ */
+ list_idx = otx2_tc_add_to_flow_list(flow_cfg, node);
+ for (i = 0; i < list_idx; i++) {
+ tmp = otx2_tc_get_entry_by_index(flow_cfg, i);
+ if (!tmp)
+ return -ENOMEM;
+
+ otx2_del_mcam_flow_entry(nic, tmp->entry, &cntr_val);
+ tmp->entry = flow_cfg->flow_ent[mcam_idx];
+ tmp->req.entry = tmp->entry;
+ tmp->req.cntr_val = cntr_val;
+ otx2_add_mcam_flow_entry(nic, &tmp->req);
+ mcam_idx++;
+ }
+
+ return mcam_idx;
+}
+
+static int otx2_tc_update_mcam_table(struct otx2_nic *nic,
+ struct otx2_flow_config *flow_cfg,
+ struct otx2_tc_flow *node,
+ bool add_req)
+{
+ if (add_req)
+ return otx2_tc_update_mcam_table_add_req(nic, flow_cfg, node);
+
+ return otx2_tc_update_mcam_table_del_req(nic, flow_cfg, node);
+}
+
static int otx2_tc_del_flow(struct otx2_nic *nic,
struct flow_cls_offload *tc_flow_cmd)
{
struct otx2_flow_config *flow_cfg = nic->flow_cfg;
- struct otx2_tc_info *tc_info = &nic->tc_info;
struct otx2_tc_flow *flow_node;
int err;
- flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
- &tc_flow_cmd->cookie,
- tc_info->flow_ht_params);
+ flow_node = otx2_tc_get_entry_by_cookie(flow_cfg, tc_flow_cmd->cookie);
if (!flow_node) {
netdev_err(nic->netdev, "tc flow not found for cookie 0x%lx\n",
tc_flow_cmd->cookie);
@@ -772,16 +950,10 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
mutex_unlock(&nic->mbox.lock);
}
- otx2_del_mcam_flow_entry(nic, flow_node->entry);
-
- WARN_ON(rhashtable_remove_fast(&nic->tc_info.flow_table,
- &flow_node->node,
- nic->tc_info.flow_ht_params));
+ otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL);
+ otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false);
kfree_rcu(flow_node, rcu);
-
- clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap);
flow_cfg->nr_flows--;
-
return 0;
}
@@ -790,15 +962,14 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
{
struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
struct otx2_flow_config *flow_cfg = nic->flow_cfg;
- struct otx2_tc_info *tc_info = &nic->tc_info;
struct otx2_tc_flow *new_node, *old_node;
struct npc_install_flow_req *req, dummy;
- int rc, err;
+ int rc, err, mcam_idx;
if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
return -ENOMEM;
- if (bitmap_full(tc_info->tc_entries_bitmap, flow_cfg->max_flows)) {
+ if (flow_cfg->nr_flows == flow_cfg->max_flows) {
NL_SET_ERR_MSG_MOD(extack,
"Free MCAM entry not available to add the flow");
return -ENOMEM;
@@ -810,6 +981,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
return -ENOMEM;
spin_lock_init(&new_node->lock);
new_node->cookie = tc_flow_cmd->cookie;
+ new_node->prio = tc_flow_cmd->common.prio;
memset(&dummy, 0, sizeof(struct npc_install_flow_req));
@@ -820,12 +992,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
}
/* If a flow exists with the same cookie, delete it */
- old_node = rhashtable_lookup_fast(&tc_info->flow_table,
- &tc_flow_cmd->cookie,
- tc_info->flow_ht_params);
+ old_node = otx2_tc_get_entry_by_cookie(flow_cfg, tc_flow_cmd->cookie);
if (old_node)
otx2_tc_del_flow(nic, tc_flow_cmd);
+ mcam_idx = otx2_tc_update_mcam_table(nic, flow_cfg, new_node, true);
mutex_lock(&nic->mbox.lock);
req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
if (!req) {
@@ -836,11 +1007,8 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
memcpy(&dummy.hdr, &req->hdr, sizeof(struct mbox_msghdr));
memcpy(req, &dummy, sizeof(struct npc_install_flow_req));
-
- new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
- flow_cfg->max_flows);
req->channel = nic->hw.rx_chan_base;
- req->entry = flow_cfg->flow_ent[flow_cfg->max_flows - new_node->bitpos - 1];
+ req->entry = flow_cfg->flow_ent[mcam_idx];
req->intf = NIX_INTF_RX;
req->set_cntr = 1;
new_node->entry = req->entry;
@@ -850,26 +1018,18 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to install MCAM flow entry");
mutex_unlock(&nic->mbox.lock);
- kfree_rcu(new_node, rcu);
goto free_leaf;
}
- mutex_unlock(&nic->mbox.lock);
- /* add new flow to flow-table */
- rc = rhashtable_insert_fast(&nic->tc_info.flow_table, &new_node->node,
- nic->tc_info.flow_ht_params);
- if (rc) {
- otx2_del_mcam_flow_entry(nic, req->entry);
- kfree_rcu(new_node, rcu);
- goto free_leaf;
- }
+ mutex_unlock(&nic->mbox.lock);
+ memcpy(&new_node->req, req, sizeof(struct npc_install_flow_req));
- set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
flow_cfg->nr_flows++;
-
return 0;
free_leaf:
+ otx2_tc_del_from_flow_list(flow_cfg, new_node);
+ kfree_rcu(new_node, rcu);
if (new_node->is_act_police) {
mutex_lock(&nic->mbox.lock);
@@ -896,16 +1056,13 @@ free_leaf:
static int otx2_tc_get_flow_stats(struct otx2_nic *nic,
struct flow_cls_offload *tc_flow_cmd)
{
- struct otx2_tc_info *tc_info = &nic->tc_info;
struct npc_mcam_get_stats_req *req;
struct npc_mcam_get_stats_rsp *rsp;
struct otx2_tc_flow_stats *stats;
struct otx2_tc_flow *flow_node;
int err;
- flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
- &tc_flow_cmd->cookie,
- tc_info->flow_ht_params);
+ flow_node = otx2_tc_get_entry_by_cookie(nic->flow_cfg, tc_flow_cmd->cookie);
if (!flow_node) {
netdev_info(nic->netdev, "tc flow not found for cookie %lx",
tc_flow_cmd->cookie);
@@ -1053,12 +1210,20 @@ static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
void *type_data, void *cb_priv)
{
struct otx2_nic *nic = cb_priv;
+ bool ntuple;
if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data))
return -EOPNOTSUPP;
+ ntuple = nic->netdev->features & NETIF_F_NTUPLE;
switch (type) {
case TC_SETUP_CLSFLOWER:
+ if (ntuple) {
+ netdev_warn(nic->netdev,
+ "Can't install TC flower offload rule when NTUPLE is active");
+ return -EOPNOTSUPP;
+ }
+
return otx2_setup_tc_cls_flower(nic, type_data);
case TC_SETUP_CLSMATCHALL:
return otx2_setup_tc_ingress_matchall(nic, type_data);
@@ -1143,18 +1308,8 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
}
EXPORT_SYMBOL(otx2_setup_tc);
-static const struct rhashtable_params tc_flow_ht_params = {
- .head_offset = offsetof(struct otx2_tc_flow, node),
- .key_offset = offsetof(struct otx2_tc_flow, cookie),
- .key_len = sizeof(((struct otx2_tc_flow *)0)->cookie),
- .automatic_shrinking = true,
-};
-
int otx2_init_tc(struct otx2_nic *nic)
{
- struct otx2_tc_info *tc = &nic->tc_info;
- int err;
-
/* Exclude receive queue 0 being used for police action */
set_bit(0, &nic->rq_bmap);
@@ -1164,25 +1319,12 @@ int otx2_init_tc(struct otx2_nic *nic)
return -EINVAL;
}
- err = otx2_tc_alloc_ent_bitmap(nic);
- if (err)
- return err;
-
- tc->flow_ht_params = tc_flow_ht_params;
- err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
- if (err) {
- kfree(tc->tc_entries_bitmap);
- tc->tc_entries_bitmap = NULL;
- }
- return err;
+ return 0;
}
EXPORT_SYMBOL(otx2_init_tc);
void otx2_shutdown_tc(struct otx2_nic *nic)
{
- struct otx2_tc_info *tc = &nic->tc_info;
-
- kfree(tc->tc_entries_bitmap);
- rhashtable_destroy(&tc->flow_table);
+ otx2_destroy_tc_flow_list(nic);
}
EXPORT_SYMBOL(otx2_shutdown_tc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index d3a76c5ccda8..1e77bbf5d22a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -19,6 +19,9 @@
#define OTX2_QOS_CLASS_NONE 0
#define OTX2_QOS_DEFAULT_PRIO 0xF
#define OTX2_QOS_INVALID_SQ 0xFFFF
+#define OTX2_QOS_INVALID_TXSCHQ_IDX 0xFFFF
+#define CN10K_MAX_RR_WEIGHT GENMASK_ULL(13, 0)
+#define OTX2_MAX_RR_QUANTUM GENMASK_ULL(23, 0)
static void otx2_qos_update_tx_netdev_queues(struct otx2_nic *pfvf)
{
@@ -65,11 +68,24 @@ static void otx2_qos_get_regaddr(struct otx2_qos_node *node,
}
}
+static int otx2_qos_quantum_to_dwrr_weight(struct otx2_nic *pfvf, u32 quantum)
+{
+ u32 weight;
+
+ weight = quantum / pfvf->hw.dwrr_mtu;
+ if (quantum % pfvf->hw.dwrr_mtu)
+ weight += 1;
+
+ return weight;
+}
+
static void otx2_config_sched_shaping(struct otx2_nic *pfvf,
struct otx2_qos_node *node,
struct nix_txschq_config *cfg,
int *num_regs)
{
+ u32 rr_weight;
+ u32 quantum;
u64 maxrate;
otx2_qos_get_regaddr(node, cfg, *num_regs);
@@ -86,8 +102,17 @@ static void otx2_config_sched_shaping(struct otx2_nic *pfvf,
return;
}
- /* configure priority */
- cfg->regval[*num_regs] = (node->schq - node->parent->prio_anchor) << 24;
+ /* configure priority/quantum */
+ if (node->is_static) {
+ cfg->regval[*num_regs] =
+ (node->schq - node->parent->prio_anchor) << 24;
+ } else {
+ quantum = node->quantum ?
+ node->quantum : pfvf->tx_max_pktlen;
+ rr_weight = otx2_qos_quantum_to_dwrr_weight(pfvf, quantum);
+ cfg->regval[*num_regs] = node->parent->child_dwrr_prio << 24 |
+ rr_weight;
+ }
(*num_regs)++;
/* configure PIR */
@@ -195,9 +220,8 @@ static int otx2_qos_txschq_set_parent_topology(struct otx2_nic *pfvf,
cfg->reg[0] = NIX_AF_TL1X_TOPOLOGY(parent->schq);
cfg->regval[0] = (u64)parent->prio_anchor << 32;
- if (parent->level == NIX_TXSCH_LVL_TL1)
- cfg->regval[0] |= (u64)TXSCH_TL1_DFLT_RR_PRIO << 1;
-
+ cfg->regval[0] |= ((parent->child_dwrr_prio != OTX2_QOS_DEFAULT_PRIO) ?
+ parent->child_dwrr_prio : 0) << 1;
cfg->num_regs++;
rc = otx2_sync_mbox_msg(&pfvf->mbox);
@@ -315,9 +339,14 @@ static void otx2_qos_fill_cfg_tl(struct otx2_qos_node *parent,
list_for_each_entry(node, &parent->child_list, list) {
otx2_qos_fill_cfg_tl(node, cfg);
- cfg->schq_contig[node->level]++;
otx2_qos_fill_cfg_schq(node, cfg);
}
+
+ /* Assign the required number of transmit schedular queues under the
+ * given class
+ */
+ cfg->schq_contig[parent->level - 1] += parent->child_dwrr_cnt +
+ parent->max_static_prio + 1;
}
static void otx2_qos_prepare_txschq_cfg(struct otx2_nic *pfvf,
@@ -378,10 +407,12 @@ otx2_qos_alloc_root(struct otx2_nic *pfvf)
return ERR_PTR(-ENOMEM);
node->parent = NULL;
- if (!is_otx2_vf(pfvf->pcifunc))
+ if (!is_otx2_vf(pfvf->pcifunc)) {
node->level = NIX_TXSCH_LVL_TL1;
- else
+ } else {
node->level = NIX_TXSCH_LVL_TL2;
+ node->child_dwrr_prio = OTX2_QOS_DEFAULT_PRIO;
+ }
WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
node->classid = OTX2_QOS_ROOT_CLASSID;
@@ -401,9 +432,13 @@ static int otx2_qos_add_child_node(struct otx2_qos_node *parent,
struct otx2_qos_node *tmp_node;
struct list_head *tmp;
+ if (node->prio > parent->max_static_prio)
+ parent->max_static_prio = node->prio;
+
for (tmp = head->next; tmp != head; tmp = tmp->next) {
tmp_node = list_entry(tmp, struct otx2_qos_node, list);
- if (tmp_node->prio == node->prio)
+ if (tmp_node->prio == node->prio &&
+ tmp_node->is_static)
return -EEXIST;
if (tmp_node->prio > node->prio) {
list_add_tail(&node->list, tmp);
@@ -434,6 +469,10 @@ static int otx2_qos_alloc_txschq_node(struct otx2_nic *pfvf,
txschq_node->rate = 0;
txschq_node->ceil = 0;
txschq_node->prio = 0;
+ txschq_node->quantum = 0;
+ txschq_node->is_static = true;
+ txschq_node->child_dwrr_prio = OTX2_QOS_DEFAULT_PRIO;
+ txschq_node->txschq_idx = OTX2_QOS_INVALID_TXSCHQ_IDX;
mutex_lock(&pfvf->qos.qos_lock);
list_add_tail(&txschq_node->list, &node->child_schq_list);
@@ -459,7 +498,7 @@ static struct otx2_qos_node *
otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf,
struct otx2_qos_node *parent,
u16 classid, u32 prio, u64 rate, u64 ceil,
- u16 qid)
+ u32 quantum, u16 qid, bool static_cfg)
{
struct otx2_qos_node *node;
int err;
@@ -476,6 +515,10 @@ otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf,
node->rate = otx2_convert_rate(rate);
node->ceil = otx2_convert_rate(ceil);
node->prio = prio;
+ node->quantum = quantum;
+ node->is_static = static_cfg;
+ node->child_dwrr_prio = OTX2_QOS_DEFAULT_PRIO;
+ node->txschq_idx = OTX2_QOS_INVALID_TXSCHQ_IDX;
__set_bit(qid, pfvf->qos.qos_sq_bmap);
@@ -622,12 +665,28 @@ static int otx2_qos_txschq_alloc(struct otx2_nic *pfvf,
}
pfvf->qos.link_cfg_lvl = rsp->link_cfg_lvl;
+ pfvf->hw.txschq_aggr_lvl_rr_prio = rsp->aggr_lvl_rr_prio;
out:
mutex_unlock(&mbox->lock);
return rc;
}
+static void otx2_qos_free_unused_txschq(struct otx2_nic *pfvf,
+ struct otx2_qos_cfg *cfg)
+{
+ int lvl, idx, schq;
+
+ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+ for (idx = 0; idx < cfg->schq_contig[lvl]; idx++) {
+ if (!cfg->schq_index_used[lvl][idx]) {
+ schq = cfg->schq_contig_list[lvl][idx];
+ otx2_txschq_free_one(pfvf, lvl, schq);
+ }
+ }
+ }
+}
+
static void otx2_qos_txschq_fill_cfg_schq(struct otx2_nic *pfvf,
struct otx2_qos_node *node,
struct otx2_qos_cfg *cfg)
@@ -652,9 +711,11 @@ static void otx2_qos_txschq_fill_cfg_tl(struct otx2_nic *pfvf,
list_for_each_entry(tmp, &node->child_list, list) {
otx2_qos_txschq_fill_cfg_tl(pfvf, tmp, cfg);
cnt = cfg->static_node_pos[tmp->level];
- tmp->schq = cfg->schq_contig_list[tmp->level][cnt];
+ tmp->schq = cfg->schq_contig_list[tmp->level][tmp->txschq_idx];
+ cfg->schq_index_used[tmp->level][tmp->txschq_idx] = true;
if (cnt == 0)
- node->prio_anchor = tmp->schq;
+ node->prio_anchor =
+ cfg->schq_contig_list[tmp->level][0];
cfg->static_node_pos[tmp->level]++;
otx2_qos_txschq_fill_cfg_schq(pfvf, tmp, cfg);
}
@@ -667,9 +728,87 @@ static void otx2_qos_txschq_fill_cfg(struct otx2_nic *pfvf,
mutex_lock(&pfvf->qos.qos_lock);
otx2_qos_txschq_fill_cfg_tl(pfvf, node, cfg);
otx2_qos_txschq_fill_cfg_schq(pfvf, node, cfg);
+ otx2_qos_free_unused_txschq(pfvf, cfg);
mutex_unlock(&pfvf->qos.qos_lock);
}
+static void __otx2_qos_assign_base_idx_tl(struct otx2_nic *pfvf,
+ struct otx2_qos_node *tmp,
+ unsigned long *child_idx_bmap,
+ int child_cnt)
+{
+ int idx;
+
+ if (tmp->txschq_idx != OTX2_QOS_INVALID_TXSCHQ_IDX)
+ return;
+
+ /* assign static nodes 1:1 prio mapping first, then remaining nodes */
+ for (idx = 0; idx < child_cnt; idx++) {
+ if (tmp->is_static && tmp->prio == idx &&
+ !test_bit(idx, child_idx_bmap)) {
+ tmp->txschq_idx = idx;
+ set_bit(idx, child_idx_bmap);
+ return;
+ } else if (!tmp->is_static && idx >= tmp->prio &&
+ !test_bit(idx, child_idx_bmap)) {
+ tmp->txschq_idx = idx;
+ set_bit(idx, child_idx_bmap);
+ return;
+ }
+ }
+}
+
+static int otx2_qos_assign_base_idx_tl(struct otx2_nic *pfvf,
+ struct otx2_qos_node *node)
+{
+ unsigned long *child_idx_bmap;
+ struct otx2_qos_node *tmp;
+ int child_cnt;
+
+ list_for_each_entry(tmp, &node->child_list, list)
+ tmp->txschq_idx = OTX2_QOS_INVALID_TXSCHQ_IDX;
+
+ /* allocate child index array */
+ child_cnt = node->child_dwrr_cnt + node->max_static_prio + 1;
+ child_idx_bmap = kcalloc(BITS_TO_LONGS(child_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!child_idx_bmap)
+ return -ENOMEM;
+
+ list_for_each_entry(tmp, &node->child_list, list)
+ otx2_qos_assign_base_idx_tl(pfvf, tmp);
+
+ /* assign base index of static priority children first */
+ list_for_each_entry(tmp, &node->child_list, list) {
+ if (!tmp->is_static)
+ continue;
+ __otx2_qos_assign_base_idx_tl(pfvf, tmp, child_idx_bmap,
+ child_cnt);
+ }
+
+ /* assign base index of dwrr priority children */
+ list_for_each_entry(tmp, &node->child_list, list)
+ __otx2_qos_assign_base_idx_tl(pfvf, tmp, child_idx_bmap,
+ child_cnt);
+
+ kfree(child_idx_bmap);
+
+ return 0;
+}
+
+static int otx2_qos_assign_base_idx(struct otx2_nic *pfvf,
+ struct otx2_qos_node *node)
+{
+ int ret = 0;
+
+ mutex_lock(&pfvf->qos.qos_lock);
+ ret = otx2_qos_assign_base_idx_tl(pfvf, node);
+ mutex_unlock(&pfvf->qos.qos_lock);
+
+ return ret;
+}
+
static int otx2_qos_txschq_push_cfg_schq(struct otx2_nic *pfvf,
struct otx2_qos_node *node,
struct otx2_qos_cfg *cfg)
@@ -761,8 +900,10 @@ static void otx2_qos_free_cfg(struct otx2_nic *pfvf, struct otx2_qos_cfg *cfg)
for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
for (idx = 0; idx < cfg->schq_contig[lvl]; idx++) {
- schq = cfg->schq_contig_list[lvl][idx];
- otx2_txschq_free_one(pfvf, lvl, schq);
+ if (cfg->schq_index_used[lvl][idx]) {
+ schq = cfg->schq_contig_list[lvl][idx];
+ otx2_txschq_free_one(pfvf, lvl, schq);
+ }
}
}
}
@@ -838,6 +979,10 @@ static int otx2_qos_push_txschq_cfg(struct otx2_nic *pfvf,
if (ret)
return -ENOSPC;
+ ret = otx2_qos_assign_base_idx(pfvf, node);
+ if (ret)
+ return -ENOMEM;
+
if (!(pfvf->netdev->flags & IFF_UP)) {
otx2_qos_txschq_fill_cfg(pfvf, node, cfg);
return 0;
@@ -894,6 +1039,13 @@ static int otx2_qos_root_add(struct otx2_nic *pfvf, u16 htb_maj_id, u16 htb_defc
goto free_root_node;
}
+ /* Update TL1 RR PRIO */
+ if (root->level == NIX_TXSCH_LVL_TL1) {
+ root->child_dwrr_prio = pfvf->hw.txschq_aggr_lvl_rr_prio;
+ netdev_dbg(pfvf->netdev,
+ "TL1 DWRR Priority %d\n", root->child_dwrr_prio);
+ }
+
if (!(pfvf->netdev->flags & IFF_UP) ||
root->level == NIX_TXSCH_LVL_TL1) {
root->schq = new_cfg->schq_list[root->level][0];
@@ -940,37 +1092,126 @@ static int otx2_qos_root_destroy(struct otx2_nic *pfvf)
return 0;
}
+static int otx2_qos_validate_quantum(struct otx2_nic *pfvf, u32 quantum)
+{
+ u32 rr_weight = otx2_qos_quantum_to_dwrr_weight(pfvf, quantum);
+ int err = 0;
+
+ /* Max Round robin weight supported by octeontx2 and CN10K
+ * is different. Validate accordingly
+ */
+ if (is_dev_otx2(pfvf->pdev))
+ err = (rr_weight > OTX2_MAX_RR_QUANTUM) ? -EINVAL : 0;
+ else if (rr_weight > CN10K_MAX_RR_WEIGHT)
+ err = -EINVAL;
+
+ return err;
+}
+
+static int otx2_qos_validate_dwrr_cfg(struct otx2_qos_node *parent,
+ struct netlink_ext_ack *extack,
+ struct otx2_nic *pfvf,
+ u64 prio, u64 quantum)
+{
+ int err;
+
+ err = otx2_qos_validate_quantum(pfvf, quantum);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported quantum value");
+ return err;
+ }
+
+ if (parent->child_dwrr_prio == OTX2_QOS_DEFAULT_PRIO) {
+ parent->child_dwrr_prio = prio;
+ } else if (prio != parent->child_dwrr_prio) {
+ NL_SET_ERR_MSG_MOD(extack, "Only one DWRR group is allowed");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int otx2_qos_validate_configuration(struct otx2_qos_node *parent,
struct netlink_ext_ack *extack,
struct otx2_nic *pfvf,
- u64 prio)
+ u64 prio, bool static_cfg)
{
- if (test_bit(prio, parent->prio_bmap)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Static priority child with same priority exists");
+ if (prio == parent->child_dwrr_prio && static_cfg) {
+ NL_SET_ERR_MSG_MOD(extack, "DWRR child group with same priority exists");
return -EEXIST;
}
- if (prio == TXSCH_TL1_DFLT_RR_PRIO) {
+ if (static_cfg && test_bit(prio, parent->prio_bmap)) {
NL_SET_ERR_MSG_MOD(extack,
- "Priority is reserved for Round Robin");
- return -EINVAL;
+ "Static priority child with same priority exists");
+ return -EEXIST;
}
return 0;
}
+static void otx2_reset_dwrr_prio(struct otx2_qos_node *parent, u64 prio)
+{
+ /* For PF, root node dwrr priority is static */
+ if (parent->level == NIX_TXSCH_LVL_TL1)
+ return;
+
+ if (parent->child_dwrr_prio != OTX2_QOS_DEFAULT_PRIO) {
+ parent->child_dwrr_prio = OTX2_QOS_DEFAULT_PRIO;
+ clear_bit(prio, parent->prio_bmap);
+ }
+}
+
+static bool is_qos_node_dwrr(struct otx2_qos_node *parent,
+ struct otx2_nic *pfvf,
+ u64 prio)
+{
+ struct otx2_qos_node *node;
+ bool ret = false;
+
+ if (parent->child_dwrr_prio == prio)
+ return true;
+
+ mutex_lock(&pfvf->qos.qos_lock);
+ list_for_each_entry(node, &parent->child_list, list) {
+ if (prio == node->prio) {
+ if (parent->child_dwrr_prio != OTX2_QOS_DEFAULT_PRIO &&
+ parent->child_dwrr_prio != prio)
+ continue;
+
+ if (otx2_qos_validate_quantum(pfvf, node->quantum)) {
+ netdev_err(pfvf->netdev,
+ "Unsupported quantum value for existing classid=0x%x quantum=%d prio=%d",
+ node->classid, node->quantum,
+ node->prio);
+ break;
+ }
+ /* mark old node as dwrr */
+ node->is_static = false;
+ parent->child_dwrr_cnt++;
+ parent->child_static_cnt--;
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&pfvf->qos.qos_lock);
+
+ return ret;
+}
+
static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid,
u32 parent_classid, u64 rate, u64 ceil,
- u64 prio, struct netlink_ext_ack *extack)
+ u64 prio, u32 quantum,
+ struct netlink_ext_ack *extack)
{
struct otx2_qos_cfg *old_cfg, *new_cfg;
struct otx2_qos_node *node, *parent;
int qid, ret, err;
+ bool static_cfg;
netdev_dbg(pfvf->netdev,
- "TC_HTB_LEAF_ALLOC_QUEUE: classid=0x%x parent_classid=0x%x rate=%lld ceil=%lld prio=%lld\n",
- classid, parent_classid, rate, ceil, prio);
+ "TC_HTB_LEAF_ALLOC_QUEUE: classid=0x%x parent_classid=0x%x rate=%lld ceil=%lld prio=%lld quantum=%d\n",
+ classid, parent_classid, rate, ceil, prio, quantum);
if (prio > OTX2_QOS_MAX_PRIO) {
NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7");
@@ -978,6 +1219,12 @@ static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid,
goto out;
}
+ if (!quantum || quantum > INT_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid quantum, range 1 - 2147483647 bytes");
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
/* get parent node */
parent = otx2_sw_node_find(pfvf, parent_classid);
if (!parent) {
@@ -991,10 +1238,24 @@ static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid,
goto out;
}
- ret = otx2_qos_validate_configuration(parent, extack, pfvf, prio);
+ static_cfg = !is_qos_node_dwrr(parent, pfvf, prio);
+ ret = otx2_qos_validate_configuration(parent, extack, pfvf, prio,
+ static_cfg);
if (ret)
goto out;
+ if (!static_cfg) {
+ ret = otx2_qos_validate_dwrr_cfg(parent, extack, pfvf, prio,
+ quantum);
+ if (ret)
+ goto out;
+ }
+
+ if (static_cfg)
+ parent->child_static_cnt++;
+ else
+ parent->child_dwrr_cnt++;
+
set_bit(prio, parent->prio_bmap);
/* read current txschq configuration */
@@ -1019,7 +1280,7 @@ static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid,
/* allocate and initialize a new child node */
node = otx2_qos_sw_create_leaf_node(pfvf, parent, classid, prio, rate,
- ceil, qid);
+ ceil, quantum, qid, static_cfg);
if (IS_ERR(node)) {
NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node");
ret = PTR_ERR(node);
@@ -1067,6 +1328,11 @@ free_node:
free_old_cfg:
kfree(old_cfg);
reset_prio:
+ if (static_cfg)
+ parent->child_static_cnt--;
+ else
+ parent->child_dwrr_cnt--;
+
clear_bit(prio, parent->prio_bmap);
out:
return ret;
@@ -1074,10 +1340,11 @@ out:
static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
u16 child_classid, u64 rate, u64 ceil, u64 prio,
- struct netlink_ext_ack *extack)
+ u32 quantum, struct netlink_ext_ack *extack)
{
struct otx2_qos_cfg *old_cfg, *new_cfg;
struct otx2_qos_node *node, *child;
+ bool static_cfg;
int ret, err;
u16 qid;
@@ -1091,6 +1358,12 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
goto out;
}
+ if (!quantum || quantum > INT_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid quantum, range 1 - 2147483647 bytes");
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
/* find node related to classid */
node = otx2_sw_node_find(pfvf, classid);
if (!node) {
@@ -1105,6 +1378,19 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
goto out;
}
+ static_cfg = !is_qos_node_dwrr(node, pfvf, prio);
+ if (!static_cfg) {
+ ret = otx2_qos_validate_dwrr_cfg(node, extack, pfvf, prio,
+ quantum);
+ if (ret)
+ goto out;
+ }
+
+ if (static_cfg)
+ node->child_static_cnt++;
+ else
+ node->child_dwrr_cnt++;
+
set_bit(prio, node->prio_bmap);
/* store the qid to assign to leaf node */
@@ -1127,7 +1413,8 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
/* allocate and initialize a new child node */
child = otx2_qos_sw_create_leaf_node(pfvf, node, child_classid,
- prio, rate, ceil, qid);
+ prio, rate, ceil, quantum,
+ qid, static_cfg);
if (IS_ERR(child)) {
NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node");
ret = PTR_ERR(child);
@@ -1178,6 +1465,10 @@ free_node:
free_old_cfg:
kfree(old_cfg);
reset_prio:
+ if (static_cfg)
+ node->child_static_cnt--;
+ else
+ node->child_dwrr_cnt--;
clear_bit(prio, node->prio_bmap);
out:
return ret;
@@ -1187,6 +1478,7 @@ static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid,
struct netlink_ext_ack *extack)
{
struct otx2_qos_node *node, *parent;
+ int dwrr_del_node = false;
u64 prio;
u16 qid;
@@ -1202,12 +1494,27 @@ static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid,
prio = node->prio;
qid = node->qid;
+ if (!node->is_static)
+ dwrr_del_node = true;
+
otx2_qos_disable_sq(pfvf, node->qid);
otx2_qos_destroy_node(pfvf, node);
pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
- clear_bit(prio, parent->prio_bmap);
+ if (dwrr_del_node) {
+ parent->child_dwrr_cnt--;
+ } else {
+ parent->child_static_cnt--;
+ clear_bit(prio, parent->prio_bmap);
+ }
+
+ /* Reset DWRR priority if all dwrr nodes are deleted */
+ if (!parent->child_dwrr_cnt)
+ otx2_reset_dwrr_prio(parent, prio);
+
+ if (!parent->child_static_cnt)
+ parent->max_static_prio = 0;
return 0;
}
@@ -1217,6 +1524,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force
{
struct otx2_qos_node *node, *parent;
struct otx2_qos_cfg *new_cfg;
+ int dwrr_del_node = false;
u64 prio;
int err;
u16 qid;
@@ -1241,11 +1549,26 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force
return -ENOENT;
}
+ if (!node->is_static)
+ dwrr_del_node = true;
+
/* destroy the leaf node */
otx2_qos_destroy_node(pfvf, node);
pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
- clear_bit(prio, parent->prio_bmap);
+ if (dwrr_del_node) {
+ parent->child_dwrr_cnt--;
+ } else {
+ parent->child_static_cnt--;
+ clear_bit(prio, parent->prio_bmap);
+ }
+
+ /* Reset DWRR priority if all dwrr nodes are deleted */
+ if (!parent->child_dwrr_cnt)
+ otx2_reset_dwrr_prio(parent, prio);
+
+ if (!parent->child_static_cnt)
+ parent->max_static_prio = 0;
/* create downstream txschq entries to parent */
err = otx2_qos_alloc_txschq_node(pfvf, parent);
@@ -1298,10 +1621,12 @@ void otx2_qos_config_txschq(struct otx2_nic *pfvf)
if (!root)
return;
- err = otx2_qos_txschq_config(pfvf, root);
- if (err) {
- netdev_err(pfvf->netdev, "Error update txschq configuration\n");
- goto root_destroy;
+ if (root->level != NIX_TXSCH_LVL_TL1) {
+ err = otx2_qos_txschq_config(pfvf, root);
+ if (err) {
+ netdev_err(pfvf->netdev, "Error update txschq configuration\n");
+ goto root_destroy;
+ }
}
err = otx2_qos_txschq_push_cfg_tl(pfvf, root, NULL);
@@ -1334,7 +1659,8 @@ int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb)
res = otx2_qos_leaf_alloc_queue(pfvf, htb->classid,
htb->parent_classid,
htb->rate, htb->ceil,
- htb->prio, htb->extack);
+ htb->prio, htb->quantum,
+ htb->extack);
if (res < 0)
return res;
htb->qid = res;
@@ -1343,7 +1669,7 @@ int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb)
return otx2_qos_leaf_to_inner(pfvf, htb->parent_classid,
htb->classid, htb->rate,
htb->ceil, htb->prio,
- htb->extack);
+ htb->quantum, htb->extack);
case TC_HTB_LEAF_DEL:
return otx2_qos_leaf_del(pfvf, &htb->classid, htb->extack);
case TC_HTB_LEAF_DEL_LAST:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
index 19773284be27..221bd0438f60 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h
@@ -35,6 +35,7 @@ struct otx2_qos_cfg {
int dwrr_node_pos[NIX_TXSCH_LVL_CNT];
u16 schq_contig_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
u16 schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+ bool schq_index_used[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
};
struct otx2_qos {
@@ -59,10 +60,18 @@ struct otx2_qos_node {
u64 ceil;
u32 classid;
u32 prio;
- u16 schq; /* hw txschq */
+ u32 quantum;
+ /* hw txschq */
+ u16 schq;
u16 qid;
u16 prio_anchor;
+ u16 max_static_prio;
+ u16 child_dwrr_cnt;
+ u16 child_static_cnt;
+ u16 child_dwrr_prio;
+ u16 txschq_idx; /* txschq allocation index */
u8 level;
+ bool is_static;
};
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
index 317e447f4991..7c27a19c4d8f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_path.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c
@@ -15,10 +15,10 @@
struct mtk_eth_muxc {
const char *name;
int cap_bit;
- int (*set_path)(struct mtk_eth *eth, int path);
+ int (*set_path)(struct mtk_eth *eth, u64 path);
};
-static const char *mtk_eth_path_name(int path)
+static const char *mtk_eth_path_name(u64 path)
{
switch (path) {
case MTK_ETH_PATH_GMAC1_RGMII:
@@ -40,10 +40,10 @@ static const char *mtk_eth_path_name(int path)
}
}
-static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
+static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, u64 path)
{
bool updated = true;
- u32 val, mask, set;
+ u32 mask, set, reg;
switch (path) {
case MTK_ETH_PATH_GMAC1_SGMII:
@@ -59,11 +59,13 @@ static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
break;
}
- if (updated) {
- val = mtk_r32(eth, MTK_MAC_MISC);
- val = (val & mask) | set;
- mtk_w32(eth, val, MTK_MAC_MISC);
- }
+ if (mtk_is_netsys_v3_or_greater(eth))
+ reg = MTK_MAC_MISC_V3;
+ else
+ reg = MTK_MAC_MISC;
+
+ if (updated)
+ mtk_m32(eth, mask, set, reg);
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
mtk_eth_path_name(path), __func__, updated);
@@ -71,7 +73,7 @@ static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
return 0;
}
-static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path)
+static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, u64 path)
{
unsigned int val = 0;
bool updated = true;
@@ -94,7 +96,7 @@ static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path)
return 0;
}
-static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path)
+static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, u64 path)
{
unsigned int val = 0, mask = 0, reg = 0;
bool updated = true;
@@ -125,7 +127,7 @@ static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path)
return 0;
}
-static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
+static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, u64 path)
{
unsigned int val = 0;
bool updated = true;
@@ -163,7 +165,7 @@ static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
return 0;
}
-static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
+static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, u64 path)
{
unsigned int val = 0;
bool updated = true;
@@ -218,7 +220,7 @@ static const struct mtk_eth_muxc mtk_eth_muxc[] = {
},
};
-static int mtk_eth_mux_setup(struct mtk_eth *eth, int path)
+static int mtk_eth_mux_setup(struct mtk_eth *eth, u64 path)
{
int i, err = 0;
@@ -249,7 +251,7 @@ out:
int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id)
{
- int path;
+ u64 path;
path = (mac_id == 0) ? MTK_ETH_PATH_GMAC1_SGMII :
MTK_ETH_PATH_GMAC2_SGMII;
@@ -260,7 +262,7 @@ int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id)
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id)
{
- int path = 0;
+ u64 path = 0;
if (mac_id == 1)
path = MTK_ETH_PATH_GMAC2_GEPHY;
@@ -274,7 +276,7 @@ int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id)
int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id)
{
- int path;
+ u64 path;
path = (mac_id == 0) ? MTK_ETH_PATH_GMAC1_RGMII :
MTK_ETH_PATH_GMAC2_RGMII;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 2d15342c260a..30e3935e83f9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -152,6 +152,54 @@ static const struct mtk_reg_map mt7986_reg_map = {
.pse_oq_sta = 0x01a0,
};
+static const struct mtk_reg_map mt7988_reg_map = {
+ .tx_irq_mask = 0x461c,
+ .tx_irq_status = 0x4618,
+ .pdma = {
+ .rx_ptr = 0x6900,
+ .rx_cnt_cfg = 0x6904,
+ .pcrx_ptr = 0x6908,
+ .glo_cfg = 0x6a04,
+ .rst_idx = 0x6a08,
+ .delay_irq = 0x6a0c,
+ .irq_status = 0x6a20,
+ .irq_mask = 0x6a28,
+ .adma_rx_dbg0 = 0x6a38,
+ .int_grp = 0x6a50,
+ },
+ .qdma = {
+ .qtx_cfg = 0x4400,
+ .qtx_sch = 0x4404,
+ .rx_ptr = 0x4500,
+ .rx_cnt_cfg = 0x4504,
+ .qcrx_ptr = 0x4508,
+ .glo_cfg = 0x4604,
+ .rst_idx = 0x4608,
+ .delay_irq = 0x460c,
+ .fc_th = 0x4610,
+ .int_grp = 0x4620,
+ .hred = 0x4644,
+ .ctx_ptr = 0x4700,
+ .dtx_ptr = 0x4704,
+ .crx_ptr = 0x4710,
+ .drx_ptr = 0x4714,
+ .fq_head = 0x4720,
+ .fq_tail = 0x4724,
+ .fq_count = 0x4728,
+ .fq_blen = 0x472c,
+ .tx_sch_rate = 0x4798,
+ },
+ .gdm1_cnt = 0x1c00,
+ .gdma_to_ppe = 0x3333,
+ .ppe_base = 0x2000,
+ .wdma_base = {
+ [0] = 0x4800,
+ [1] = 0x4c00,
+ },
+ .pse_iq_sta = 0x0180,
+ .pse_oq_sta = 0x01a0,
+};
+
/* strings used by ethtool */
static const struct mtk_ethtool_stats {
char str[ETH_GSTRING_LEN];
@@ -179,10 +227,54 @@ static const struct mtk_ethtool_stats {
};
static const char * const mtk_clks_source_name[] = {
- "ethif", "sgmiitop", "esw", "gp0", "gp1", "gp2", "fe", "trgpll",
- "sgmii_tx250m", "sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb",
- "sgmii2_tx250m", "sgmii2_rx250m", "sgmii2_cdr_ref", "sgmii2_cdr_fb",
- "sgmii_ck", "eth2pll", "wocpu0", "wocpu1", "netsys0", "netsys1"
+ "ethif",
+ "sgmiitop",
+ "esw",
+ "gp0",
+ "gp1",
+ "gp2",
+ "gp3",
+ "xgp1",
+ "xgp2",
+ "xgp3",
+ "crypto",
+ "fe",
+ "trgpll",
+ "sgmii_tx250m",
+ "sgmii_rx250m",
+ "sgmii_cdr_ref",
+ "sgmii_cdr_fb",
+ "sgmii2_tx250m",
+ "sgmii2_rx250m",
+ "sgmii2_cdr_ref",
+ "sgmii2_cdr_fb",
+ "sgmii_ck",
+ "eth2pll",
+ "wocpu0",
+ "wocpu1",
+ "netsys0",
+ "netsys1",
+ "ethwarp_wocpu2",
+ "ethwarp_wocpu1",
+ "ethwarp_wocpu0",
+ "top_usxgmii0_sel",
+ "top_usxgmii1_sel",
+ "top_sgm0_sel",
+ "top_sgm1_sel",
+ "top_xfi_phy0_xtal_sel",
+ "top_xfi_phy1_xtal_sel",
+ "top_eth_gmii_sel",
+ "top_eth_refck_50m_sel",
+ "top_eth_sys_200m_sel",
+ "top_eth_sys_sel",
+ "top_eth_xgmii_sel",
+ "top_eth_mii_sel",
+ "top_netsys_sel",
+ "top_netsys_500m_sel",
+ "top_netsys_pao_2x_sel",
+ "top_netsys_sync_250m_sel",
+ "top_netsys_ppefb_250m_sel",
+ "top_netsys_warp_sel",
};
void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -195,7 +287,7 @@ u32 mtk_r32(struct mtk_eth *eth, unsigned reg)
return __raw_readl(eth->base + reg);
}
-static u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned reg)
+u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned int reg)
{
u32 val;
@@ -385,10 +477,8 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
}
static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth,
- phy_interface_t interface, int speed)
+ phy_interface_t interface)
{
- unsigned long rate;
- u32 tck, rck, intf;
int ret;
if (interface == PHY_INTERFACE_MODE_TRGMII) {
@@ -399,30 +489,20 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth,
return;
}
- if (speed == SPEED_1000) {
- intf = INTF_MODE_RGMII_1000;
- rate = 250000000;
- rck = RCK_CTRL_RGMII_1000;
- tck = TCK_CTRL_RGMII_1000;
- } else {
- intf = INTF_MODE_RGMII_10_100;
- rate = 500000000;
- rck = RCK_CTRL_RGMII_10_100;
- tck = TCK_CTRL_RGMII_10_100;
- }
-
- mtk_w32(eth, intf, INTF_MODE);
-
- regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
- ETHSYS_TRGMII_CLK_SEL362_5,
- ETHSYS_TRGMII_CLK_SEL362_5);
+ dev_err(eth->dev, "Missing PLL configuration, ethernet may not work\n");
+}
- ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], rate);
- if (ret)
- dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
+static void mtk_setup_bridge_switch(struct mtk_eth *eth)
+{
+ /* Force Port1 XGMAC Link Up */
+ mtk_m32(eth, 0, MTK_XGMAC_FORCE_LINK(MTK_GMAC1_ID),
+ MTK_XGMAC_STS(MTK_GMAC1_ID));
- mtk_w32(eth, rck, TRGMII_RCK_CTRL);
- mtk_w32(eth, tck, TRGMII_TCK_CTRL);
+ /* Adjust GSW bridge IPG to 11 */
+ mtk_m32(eth, GSWTX_IPG_MASK | GSWRX_IPG_MASK,
+ (GSW_IPG_11 << GSWTX_IPG_SHIFT) |
+ (GSW_IPG_11 << GSWRX_IPG_SHIFT),
+ MTK_GSW_CFG);
}
static struct phylink_pcs *mtk_mac_select_pcs(struct phylink_config *config,
@@ -484,6 +564,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
goto init_err;
}
break;
+ case PHY_INTERFACE_MODE_INTERNAL:
+ break;
default:
goto err_phy;
}
@@ -498,17 +580,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
state->interface))
goto err_phy;
} else {
- /* FIXME: this is incorrect. Not only does it
- * use state->speed (which is not guaranteed
- * to be correct) but it also makes use of it
- * in a code path that will only be reachable
- * when the PHY interface mode changes, not
- * when the speed changes. Consequently, RGMII
- * is probably broken.
- */
mtk_gmac0_rgmii_adjust(mac->hw,
- state->interface,
- state->speed);
+ state->interface);
/* mt7623_pad_clk_setup */
for (i = 0 ; i < NUM_TRGMII_CTRL; i++)
@@ -562,6 +635,15 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
return;
}
+ /* Setup gmac */
+ if (mtk_is_netsys_v3_or_greater(eth) &&
+ mac->interface == PHY_INTERFACE_MODE_INTERNAL) {
+ mtk_w32(mac->hw, MTK_GDMA_XGDM_SEL, MTK_GDMA_EG_CTRL(mac->id));
+ mtk_w32(mac->hw, MAC_MCR_FORCE_LINK_DOWN, MTK_MAC_MCR(mac->id));
+
+ mtk_setup_bridge_switch(eth);
+ }
+
return;
err_phy:
@@ -602,38 +684,6 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode,
return 0;
}
-static void mtk_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct mtk_mac *mac = container_of(config, struct mtk_mac,
- phylink_config);
- u32 pmsr = mtk_r32(mac->hw, MTK_MAC_MSR(mac->id));
-
- state->link = (pmsr & MAC_MSR_LINK);
- state->duplex = (pmsr & MAC_MSR_DPX) >> 1;
-
- switch (pmsr & (MAC_MSR_SPEED_1000 | MAC_MSR_SPEED_100)) {
- case 0:
- state->speed = SPEED_10;
- break;
- case MAC_MSR_SPEED_100:
- state->speed = SPEED_100;
- break;
- case MAC_MSR_SPEED_1000:
- state->speed = SPEED_1000;
- break;
- default:
- state->speed = SPEED_UNKNOWN;
- break;
- }
-
- state->pause &= (MLO_PAUSE_RX | MLO_PAUSE_TX);
- if (pmsr & MAC_MSR_RX_FC)
- state->pause |= MLO_PAUSE_RX;
- if (pmsr & MAC_MSR_TX_FC)
- state->pause |= MLO_PAUSE_TX;
-}
-
static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode,
phy_interface_t interface)
{
@@ -659,7 +709,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx,
FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v1(eth))
val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
if (IS_ENABLED(CONFIG_SOC_MT7621)) {
@@ -756,7 +806,6 @@ static void mtk_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops mtk_phylink_ops = {
.mac_select_pcs = mtk_mac_select_pcs,
- .mac_pcs_get_state = mtk_mac_pcs_get_state,
.mac_config = mtk_mac_config,
.mac_finish = mtk_mac_finish,
.mac_link_down = mtk_mac_link_down,
@@ -807,11 +856,15 @@ static int mtk_mdio_init(struct mtk_eth *eth)
}
divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63);
+ /* Configure MDC Turbo Mode */
+ if (mtk_is_netsys_v3_or_greater(eth))
+ mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3);
+
/* Configure MDC Divider */
- val = mtk_r32(eth, MTK_PPSC);
- val &= ~PPSC_MDC_CFG;
- val |= FIELD_PREP(PPSC_MDC_CFG, divider) | PPSC_MDC_TURBO;
- mtk_w32(eth, val, MTK_PPSC);
+ val = FIELD_PREP(PPSC_MDC_CFG, divider);
+ if (!mtk_is_netsys_v3_or_greater(eth))
+ val |= PPSC_MDC_TURBO;
+ mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC);
dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / divider);
@@ -943,17 +996,32 @@ void mtk_stats_update_mac(struct mtk_mac *mac)
mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x20 + offs);
hw_stats->rx_flow_control_packets +=
mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x24 + offs);
- hw_stats->tx_skip +=
- mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x28 + offs);
- hw_stats->tx_collisions +=
- mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x2c + offs);
- hw_stats->tx_bytes +=
- mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x30 + offs);
- stats = mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x34 + offs);
- if (stats)
- hw_stats->tx_bytes += (stats << 32);
- hw_stats->tx_packets +=
- mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x38 + offs);
+
+ if (mtk_is_netsys_v3_or_greater(eth)) {
+ hw_stats->tx_skip +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x50 + offs);
+ hw_stats->tx_collisions +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x54 + offs);
+ hw_stats->tx_bytes +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x40 + offs);
+ stats = mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x44 + offs);
+ if (stats)
+ hw_stats->tx_bytes += (stats << 32);
+ hw_stats->tx_packets +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x48 + offs);
+ } else {
+ hw_stats->tx_skip +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x28 + offs);
+ hw_stats->tx_collisions +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x2c + offs);
+ hw_stats->tx_bytes +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x30 + offs);
+ stats = mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x34 + offs);
+ if (stats)
+ hw_stats->tx_bytes += (stats << 32);
+ hw_stats->tx_packets +=
+ mtk_r32(mac->hw, reg_map->gdm1_cnt + 0x38 + offs);
+ }
}
u64_stats_update_end(&hw_stats->syncp);
@@ -963,7 +1031,7 @@ static void mtk_stats_update(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->mac[i] || !eth->mac[i]->hw_stats)
continue;
if (spin_trylock(&eth->mac[i]->hw_stats->stats_lock)) {
@@ -1037,7 +1105,7 @@ static bool mtk_rx_get_desc(struct mtk_eth *eth, struct mtk_rx_dma_v2 *rxd,
rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
rxd->rxd5 = READ_ONCE(dma_rxd->rxd5);
rxd->rxd6 = READ_ONCE(dma_rxd->rxd6);
}
@@ -1095,7 +1163,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
txd->txd4 = 0;
- if (MTK_HAS_CAPS(soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
txd->txd5 = 0;
txd->txd6 = 0;
txd->txd7 = 0;
@@ -1257,7 +1325,19 @@ static void mtk_tx_set_dma_desc_v2(struct net_device *dev, void *txd,
data |= TX_DMA_LS0;
WRITE_ONCE(desc->txd3, data);
- data = (mac->id + 1) << TX_DMA_FPORT_SHIFT_V2; /* forward port */
+ /* set forward port */
+ switch (mac->id) {
+ case MTK_GMAC1_ID:
+ data = PSE_GDM1_PORT << TX_DMA_FPORT_SHIFT_V2;
+ break;
+ case MTK_GMAC2_ID:
+ data = PSE_GDM2_PORT << TX_DMA_FPORT_SHIFT_V2;
+ break;
+ case MTK_GMAC3_ID:
+ data = PSE_GDM3_PORT << TX_DMA_FPORT_SHIFT_V2;
+ break;
+ }
+
data |= TX_DMA_SWC_V2 | QID_BITS_V2(info->qid);
WRITE_ONCE(desc->txd4, data);
@@ -1268,6 +1348,8 @@ static void mtk_tx_set_dma_desc_v2(struct net_device *dev, void *txd,
/* tx checksum offload */
if (info->csum)
data |= TX_DMA_CHKSUM_V2;
+ if (mtk_is_netsys_v3_or_greater(eth) && netdev_uses_dsa(dev))
+ data |= TX_DMA_SPTAG_V3;
}
WRITE_ONCE(desc->txd5, data);
@@ -1286,7 +1368,7 @@ static void mtk_tx_set_dma_desc(struct net_device *dev, void *txd,
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
mtk_tx_set_dma_desc_v2(dev, txd, info);
else
mtk_tx_set_dma_desc_v1(dev, txd, info);
@@ -1333,8 +1415,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
mtk_tx_set_dma_desc(dev, itxd, &txd_info);
itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
- itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
- MTK_TX_FLAGS_FPORT1;
+ itx_buf->mac_id = mac->id;
setup_tx_buf(eth, itx_buf, itxd_pdma, txd_info.addr, txd_info.size,
k++);
@@ -1382,8 +1463,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
memset(tx_buf, 0, sizeof(*tx_buf));
tx_buf->data = (void *)MTK_DMA_DUMMY_DESC;
tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
- tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
- MTK_TX_FLAGS_FPORT1;
+ tx_buf->mac_id = mac->id;
setup_tx_buf(eth, tx_buf, txd_pdma, txd_info.addr,
txd_info.size, k++);
@@ -1468,7 +1548,7 @@ static int mtk_queue_stopped(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
if (netif_queue_stopped(eth->netdev[i]))
@@ -1482,7 +1562,7 @@ static void mtk_wake_queue(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
netif_tx_wake_all_queues(eth->netdev[i]);
@@ -1593,7 +1673,7 @@ static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
static bool mtk_page_pool_enabled(struct mtk_eth *eth)
{
- return MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2);
+ return eth->soc->version == 2;
}
static struct page_pool *mtk_create_page_pool(struct mtk_eth *eth,
@@ -1685,7 +1765,7 @@ static int mtk_xdp_frame_map(struct mtk_eth *eth, struct net_device *dev,
}
mtk_tx_set_dma_desc(dev, txd, txd_info);
- tx_buf->flags |= !mac->id ? MTK_TX_FLAGS_FPORT0 : MTK_TX_FLAGS_FPORT1;
+ tx_buf->mac_id = mac->id;
tx_buf->type = dma_map ? MTK_TYPE_XDP_NDO : MTK_TYPE_XDP_TX;
tx_buf->data = (void *)MTK_DMA_DUMMY_DESC;
@@ -1935,13 +2015,26 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
break;
/* find out which mac the packet come from. values start at 1 */
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
- mac = RX_DMA_GET_SPORT_V2(trxd.rxd5) - 1;
- else if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
- !(trxd.rxd4 & RX_DMA_SPECIAL_TAG))
+ if (mtk_is_netsys_v2_or_greater(eth)) {
+ u32 val = RX_DMA_GET_SPORT_V2(trxd.rxd5);
+
+ switch (val) {
+ case PSE_GDM1_PORT:
+ case PSE_GDM2_PORT:
+ mac = val - 1;
+ break;
+ case PSE_GDM3_PORT:
+ mac = MTK_GMAC3_ID;
+ break;
+ default:
+ break;
+ }
+ } else if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
+ !(trxd.rxd4 & RX_DMA_SPECIAL_TAG)) {
mac = RX_DMA_GET_SPORT(trxd.rxd4) - 1;
+ }
- if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
+ if (unlikely(mac < 0 || mac >= MTK_MAX_DEVS ||
!eth->netdev[mac]))
goto release_desc;
@@ -2031,7 +2124,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
skb->dev = netdev;
bytes += skb->len;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
reason = FIELD_GET(MTK_RXD5_PPE_CPU_REASON, trxd.rxd5);
hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY;
if (hash != MTK_RXD5_FOE_ENTRY)
@@ -2056,8 +2149,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
/* When using VLAN untagging in combination with DSA, the
* hardware treats the MTK special tag as a VLAN and untags it.
*/
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
- (trxd.rxd2 & RX_DMA_VTAG) && netdev_uses_dsa(netdev)) {
+ if (mtk_is_netsys_v1(eth) && (trxd.rxd2 & RX_DMA_VTAG) &&
+ netdev_uses_dsa(netdev)) {
unsigned int port = RX_DMA_VPID(trxd.rxd3) & GENMASK(2, 0);
if (port < ARRAY_SIZE(eth->dsa_meta) &&
@@ -2161,7 +2254,6 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
while ((cpu != dma) && budget) {
u32 next_cpu = desc->txd2;
- int mac = 0;
desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
@@ -2169,15 +2261,13 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
tx_buf = mtk_desc_to_tx_buf(ring, desc,
eth->soc->txrx.txd_size);
- if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
- mac = 1;
-
if (!tx_buf->data)
break;
if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) {
if (tx_buf->type == MTK_TYPE_SKB)
- mtk_poll_tx_done(eth, state, mac, tx_buf->data);
+ mtk_poll_tx_done(eth, state, tx_buf->mac_id,
+ tx_buf->data);
budget--;
}
@@ -2367,7 +2457,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
txd->txd2 = next_ptr;
txd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
txd->txd4 = 0;
- if (MTK_HAS_CAPS(soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
txd->txd5 = 0;
txd->txd6 = 0;
txd->txd7 = 0;
@@ -2420,14 +2510,14 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v1(eth))
val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs);
ofs += MTK_QTX_OFFSET;
}
val = MTK_QDMA_TX_SCH_MAX_WFQ | (MTK_QDMA_TX_SCH_MAX_WFQ << 16);
mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate + 4);
} else {
mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
@@ -2556,7 +2646,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
rxd->rxd3 = 0;
rxd->rxd4 = 0;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
rxd->rxd5 = 0;
rxd->rxd6 = 0;
rxd->rxd7 = 0;
@@ -2978,7 +3068,7 @@ static void mtk_dma_free(struct mtk_eth *eth)
const struct mtk_soc_data *soc = eth->soc;
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++)
+ for (i = 0; i < MTK_MAX_DEVS; i++)
if (eth->netdev[i])
netdev_reset_queue(eth->netdev[i]);
if (eth->scratch_ring) {
@@ -3104,7 +3194,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
MTK_RX_2B_OFFSET | MTK_TX_WB_DDONE;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
val |= MTK_MUTLI_CNT | MTK_RESV_BUF |
MTK_WCOMP_EN | MTK_DMAD_WR_WDONE |
MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN;
@@ -3132,8 +3222,13 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config)
if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
return;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
- u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
+ u32 val;
+
+ if (!eth->netdev[i])
+ continue;
+
+ val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
/* default setup the forward port to send frame to PDMA */
val &= ~0xffff;
@@ -3143,7 +3238,7 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config)
val |= config;
- if (eth->netdev[i] && netdev_uses_dsa(eth->netdev[i]))
+ if (netdev_uses_dsa(eth->netdev[i]))
val |= MTK_GDMA_SPECIAL_TAG;
mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
@@ -3250,7 +3345,7 @@ static int mtk_open(struct net_device *dev)
phylink_start(mac->phylink);
netif_tx_start_all_queues(dev);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return 0;
if (mtk_uses_dsa(dev) && !eth->prog) {
@@ -3516,7 +3611,7 @@ static void mtk_hw_reset(struct mtk_eth *eth)
{
u32 val;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
regmap_write(eth->ethsys, ETHSYS_FE_RST_CHK_IDLE_EN, 0);
val = RSTCTRL_PPE0_V2;
} else {
@@ -3528,7 +3623,7 @@ static void mtk_hw_reset(struct mtk_eth *eth)
ethsys_reset(eth, RSTCTRL_ETH | RSTCTRL_FE | val);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
regmap_write(eth->ethsys, ETHSYS_FE_RST_CHK_IDLE_EN,
0x3ffffff);
}
@@ -3554,7 +3649,7 @@ static void mtk_hw_warm_reset(struct mtk_eth *eth)
return;
}
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
rst_mask = RSTCTRL_ETH | RSTCTRL_PPE0_V2;
else
rst_mask = RSTCTRL_ETH | RSTCTRL_PPE0;
@@ -3724,7 +3819,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
else
mtk_hw_reset(eth);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
/* Set FE to PDMAv2 if necessary */
val = mtk_r32(eth, MTK_FE_GLO_MISC);
mtk_w32(eth, val | BIT(4), MTK_FE_GLO_MISC);
@@ -3745,15 +3840,15 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
* up with the more appropriate value when mtk_mac_config call is being
* invoked.
*/
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
struct net_device *dev = eth->netdev[i];
- mtk_w32(eth, MAC_MCR_FORCE_LINK_DOWN, MTK_MAC_MCR(i));
- if (dev) {
- struct mtk_mac *mac = netdev_priv(dev);
+ if (!dev)
+ continue;
- mtk_set_mcr_max_rx(mac, dev->mtu + MTK_RX_ETH_HLEN);
- }
+ mtk_w32(eth, MAC_MCR_FORCE_LINK_DOWN, MTK_MAC_MCR(i));
+ mtk_set_mcr_max_rx(netdev_priv(dev),
+ dev->mtu + MTK_RX_ETH_HLEN);
}
/* Indicates CDM to parse the MTK special tag from CPU
@@ -3761,7 +3856,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
*/
val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
mtk_w32(eth, val | MTK_CDMQ_STAG_EN, MTK_CDMQ_IG_CTRL);
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v1(eth)) {
val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
@@ -3783,7 +3878,24 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
mtk_w32(eth, eth->soc->txrx.rx_irq_done_mask, reg_map->qdma.int_grp + 4);
mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v3_or_greater(eth)) {
+ /* PSE should not drop port1, port8 and port9 packets */
+ mtk_w32(eth, 0x00000302, PSE_DROP_CFG);
+
+ /* GDM and CDM Threshold */
+ mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES);
+ mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES);
+
+ /* Disable GDM1 RX CRC stripping */
+ mtk_m32(eth, MTK_GDMA_STRP_CRC, 0, MTK_GDMA_FWD_CFG(0));
+
+ /* PSE GDM3 MIB counter has incorrect hw default values,
+ * so the driver ought to read clear the values beforehand
+ * in case ethtool retrieve wrong mib values.
+ */
+ for (i = 0; i < 0x80; i += 0x4)
+ mtk_r32(eth, reg_map->gdm1_cnt + 0x100 + i);
+ } else if (!mtk_is_netsys_v1(eth)) {
/* PSE should not drop port8 and port9 packets from WDMA Tx */
mtk_w32(eth, 0x00000300, PSE_DROP_CFG);
@@ -3933,7 +4045,7 @@ static void mtk_pending_work(struct work_struct *work)
mtk_prepare_for_reset(eth);
/* stop all devices to make sure that dma is properly shut down */
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i] || !netif_running(eth->netdev[i]))
continue;
@@ -3949,8 +4061,8 @@ static void mtk_pending_work(struct work_struct *work)
mtk_hw_init(eth, true);
/* restart DMA and enable IRQs */
- for (i = 0; i < MTK_MAC_COUNT; i++) {
- if (!test_bit(i, &restart))
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
+ if (!eth->netdev[i] || !test_bit(i, &restart))
continue;
if (mtk_open(eth->netdev[i])) {
@@ -3977,7 +4089,7 @@ static int mtk_free_dev(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
free_netdev(eth->netdev[i]);
@@ -3996,7 +4108,7 @@ static int mtk_unreg_dev(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
struct mtk_mac *mac;
if (!eth->netdev[i])
continue;
@@ -4298,7 +4410,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
}
id = be32_to_cpup(_id);
- if (id >= MTK_MAC_COUNT) {
+ if (id >= MTK_MAX_DEVS) {
dev_err(eth->dev, "%d is not a valid mac id\n", id);
return -EINVAL;
}
@@ -4346,7 +4458,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
}
spin_lock_init(&mac->hw_stats->stats_lock);
u64_stats_init(&mac->hw_stats->syncp);
- mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
+
+ if (mtk_is_netsys_v3_or_greater(eth))
+ mac->hw_stats->reg_offset = id * 0x80;
+ else
+ mac->hw_stats->reg_offset = id * 0x40;
/* phylink create */
err = of_get_phy_mode(np, &phy_mode);
@@ -4361,18 +4477,22 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
mac->phylink_config.dev = &eth->netdev[id]->dev;
mac->phylink_config.type = PHYLINK_NETDEV;
- /* This driver makes use of state->speed in mac_config */
- mac->phylink_config.legacy_pre_march2020 = true;
mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
- __set_bit(PHY_INTERFACE_MODE_MII,
- mac->phylink_config.supported_interfaces);
- __set_bit(PHY_INTERFACE_MODE_GMII,
- mac->phylink_config.supported_interfaces);
+ /* MT7623 gmac0 is now missing its speed-specific PLL configuration
+ * in its .mac_config method (since state->speed is not valid there.
+ * Disable support for MII, GMII and RGMII.
+ */
+ if (!mac->hw->soc->disable_pll_modes || mac->id != 0) {
+ __set_bit(PHY_INTERFACE_MODE_MII,
+ mac->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ mac->phylink_config.supported_interfaces);
- if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_RGMII))
- phy_interface_set_rgmii(mac->phylink_config.supported_interfaces);
+ if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_RGMII))
+ phy_interface_set_rgmii(mac->phylink_config.supported_interfaces);
+ }
if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII) && !mac->id)
__set_bit(PHY_INTERFACE_MODE_TRGMII,
@@ -4396,6 +4516,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
mac->phylink_config.supported_interfaces);
}
+ if (mtk_is_netsys_v3_or_greater(mac->hw) &&
+ MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW_BIT) &&
+ id == MTK_GMAC1_ID) {
+ mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE |
+ MAC_SYM_PAUSE |
+ MAC_10000FD;
+ phy_interface_zero(mac->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ mac->phylink_config.supported_interfaces);
+ }
+
phylink = phylink_create(&mac->phylink_config,
of_fwnode_handle(mac->of_node),
phy_mode, &mtk_phylink_ops);
@@ -4454,7 +4585,7 @@ void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev)
rtnl_lock();
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
dev = eth->netdev[i];
if (!dev || !(dev->flags & IFF_UP))
@@ -4584,7 +4715,7 @@ static int mtk_probe(struct platform_device *pdev)
}
}
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
err = -EINVAL;
@@ -4692,9 +4823,8 @@ static int mtk_probe(struct platform_device *pdev)
}
if (eth->soc->offload_version) {
- u32 num_ppe;
+ u32 num_ppe = mtk_is_netsys_v2_or_greater(eth) ? 2 : 1;
- num_ppe = MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ? 2 : 1;
num_ppe = min_t(u32, ARRAY_SIZE(eth->ppe), num_ppe);
for (i = 0; i < num_ppe; i++) {
u32 ppe_addr = eth->soc->reg_map->ppe_base + i * 0x400;
@@ -4761,7 +4891,7 @@ static int mtk_remove(struct platform_device *pdev)
int i;
/* stop all devices to make sure that dma is properly shut down */
- for (i = 0; i < MTK_MAC_COUNT; i++) {
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
mtk_stop(eth->netdev[i]);
@@ -4786,6 +4916,7 @@ static const struct mtk_soc_data mt2701_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
+ .version = 1,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4802,9 +4933,10 @@ static const struct mtk_soc_data mt7621_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7621_CLKS_BITMAP,
.required_pctl = false,
+ .version = 1,
.offload_version = 1,
.hash_offset = 2,
- .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
+ .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4822,10 +4954,11 @@ static const struct mtk_soc_data mt7622_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7622_CLKS_BITMAP,
.required_pctl = false,
+ .version = 1,
.offload_version = 2,
.hash_offset = 2,
.has_accounting = true,
- .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
+ .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4842,9 +4975,11 @@ static const struct mtk_soc_data mt7623_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
+ .version = 1,
.offload_version = 1,
.hash_offset = 2,
- .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
+ .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE,
+ .disable_pll_modes = true,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4863,6 +4998,7 @@ static const struct mtk_soc_data mt7629_data = {
.required_clks = MT7629_CLKS_BITMAP,
.required_pctl = false,
.has_accounting = true,
+ .version = 1,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4880,10 +5016,11 @@ static const struct mtk_soc_data mt7981_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7981_CLKS_BITMAP,
.required_pctl = false,
+ .version = 2,
.offload_version = 2,
.hash_offset = 4,
- .foe_entry_size = sizeof(struct mtk_foe_entry),
.has_accounting = true,
+ .foe_entry_size = MTK_FOE_ENTRY_V2_SIZE,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma_v2),
.rxd_size = sizeof(struct mtk_rx_dma_v2),
@@ -4901,10 +5038,29 @@ static const struct mtk_soc_data mt7986_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7986_CLKS_BITMAP,
.required_pctl = false,
+ .version = 2,
.offload_version = 2,
.hash_offset = 4,
- .foe_entry_size = sizeof(struct mtk_foe_entry),
.has_accounting = true,
+ .foe_entry_size = MTK_FOE_ENTRY_V2_SIZE,
+ .txrx = {
+ .txd_size = sizeof(struct mtk_tx_dma_v2),
+ .rxd_size = sizeof(struct mtk_rx_dma_v2),
+ .rx_irq_done_mask = MTK_RX_DONE_INT_V2,
+ .rx_dma_l4_valid = RX_DMA_L4_VALID_V2,
+ .dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
+ .dma_len_offset = 8,
+ },
+};
+
+static const struct mtk_soc_data mt7988_data = {
+ .reg_map = &mt7988_reg_map,
+ .ana_rgc3 = 0x128,
+ .caps = MT7988_CAPS,
+ .hw_features = MTK_HW_FEATURES,
+ .required_clks = MT7988_CLKS_BITMAP,
+ .required_pctl = false,
+ .version = 3,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma_v2),
.rxd_size = sizeof(struct mtk_rx_dma_v2),
@@ -4921,6 +5077,7 @@ static const struct mtk_soc_data rt5350_data = {
.hw_features = MTK_HW_FEATURES_MT7628,
.required_clks = MT7628_CLKS_BITMAP,
.required_pctl = false,
+ .version = 1,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4932,14 +5089,15 @@ static const struct mtk_soc_data rt5350_data = {
};
const struct of_device_id of_mtk_match[] = {
- { .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
- { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data},
- { .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
- { .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
- { .compatible = "mediatek,mt7629-eth", .data = &mt7629_data},
- { .compatible = "mediatek,mt7981-eth", .data = &mt7981_data},
- { .compatible = "mediatek,mt7986-eth", .data = &mt7986_data},
- { .compatible = "ralink,rt5350-eth", .data = &rt5350_data},
+ { .compatible = "mediatek,mt2701-eth", .data = &mt2701_data },
+ { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data },
+ { .compatible = "mediatek,mt7622-eth", .data = &mt7622_data },
+ { .compatible = "mediatek,mt7623-eth", .data = &mt7623_data },
+ { .compatible = "mediatek,mt7629-eth", .data = &mt7629_data },
+ { .compatible = "mediatek,mt7981-eth", .data = &mt7981_data },
+ { .compatible = "mediatek,mt7986-eth", .data = &mt7986_data },
+ { .compatible = "mediatek,mt7988-eth", .data = &mt7988_data },
+ { .compatible = "ralink,rt5350-eth", .data = &rt5350_data },
{},
};
MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 707445f6bcb1..80d17729e557 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -33,7 +33,6 @@
#define MTK_TX_DMA_BUF_LEN_V2 0xffff
#define MTK_QDMA_RING_SIZE 2048
#define MTK_DMA_SIZE 512
-#define MTK_MAC_COUNT 2
#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
#define MTK_DMA_DUMMY_DESC 0xffffffff
@@ -118,14 +117,21 @@
#define MTK_CDMP_EG_CTRL 0x404
/* GDM Exgress Control Register */
-#define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000))
+#define MTK_GDMA_FWD_CFG(x) ({ typeof(x) _x = (x); (_x == MTK_GMAC3_ID) ? \
+ 0x540 : 0x500 + (_x * 0x1000); })
#define MTK_GDMA_SPECIAL_TAG BIT(24)
#define MTK_GDMA_ICS_EN BIT(22)
#define MTK_GDMA_TCS_EN BIT(21)
#define MTK_GDMA_UCS_EN BIT(20)
+#define MTK_GDMA_STRP_CRC BIT(16)
#define MTK_GDMA_TO_PDMA 0x0
#define MTK_GDMA_DROP_ALL 0x7777
+/* GDM Egress Control Register */
+#define MTK_GDMA_EG_CTRL(x) ({ typeof(x) _x = (x); (_x == MTK_GMAC3_ID) ? \
+ 0x544 : 0x504 + (_x * 0x1000); })
+#define MTK_GDMA_XGDM_SEL BIT(31)
+
/* Unicast Filter MAC Address Register - Low */
#define MTK_GDMA_MAC_ADRL(x) (0x508 + (x * 0x1000))
@@ -288,8 +294,6 @@
/* QDMA Interrupt grouping registers */
#define MTK_RLS_DONE_INT BIT(0)
-#define MTK_STAT_OFFSET 0x40
-
/* QDMA TX NUM */
#define QID_BITS_V2(x) (((x) & 0x3f) << 16)
#define MTK_QDMA_GMAC2_QID 8
@@ -302,6 +306,8 @@
#define TX_DMA_CHKSUM_V2 (0x7 << 28)
#define TX_DMA_TSO_V2 BIT(31)
+#define TX_DMA_SPTAG_V3 BIT(27)
+
/* QDMA V2 descriptor txd4 */
#define TX_DMA_FPORT_SHIFT_V2 8
#define TX_DMA_FPORT_MASK_V2 0xf
@@ -389,7 +395,26 @@
#define PHY_IAC_TIMEOUT HZ
#define MTK_MAC_MISC 0x1000c
+#define MTK_MAC_MISC_V3 0x10010
#define MTK_MUX_TO_ESW BIT(0)
+#define MISC_MDC_TURBO BIT(4)
+
+/* XMAC status registers */
+#define MTK_XGMAC_STS(x) (((x) == MTK_GMAC3_ID) ? 0x1001C : 0x1000C)
+#define MTK_XGMAC_FORCE_LINK(x) (((x) == MTK_GMAC2_ID) ? BIT(31) : BIT(15))
+#define MTK_USXGMII_PCS_LINK BIT(8)
+#define MTK_XGMAC_RX_FC BIT(5)
+#define MTK_XGMAC_TX_FC BIT(4)
+#define MTK_USXGMII_PCS_MODE GENMASK(3, 1)
+#define MTK_XGMAC_LINK_STS BIT(0)
+
+/* GSW bridge registers */
+#define MTK_GSW_CFG (0x10080)
+#define GSWTX_IPG_MASK GENMASK(19, 16)
+#define GSWTX_IPG_SHIFT 16
+#define GSWRX_IPG_MASK GENMASK(3, 0)
+#define GSWRX_IPG_SHIFT 0
+#define GSW_IPG_11 11
/* Mac control registers */
#define MTK_MAC_MCR(x) (0x10100 + (x * 0x100))
@@ -635,12 +660,6 @@ enum mtk_tx_flags {
*/
MTK_TX_FLAGS_SINGLE0 = 0x01,
MTK_TX_FLAGS_PAGE0 = 0x02,
-
- /* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
- * SKB out instead of looking up through hardware TX descriptor.
- */
- MTK_TX_FLAGS_FPORT0 = 0x04,
- MTK_TX_FLAGS_FPORT1 = 0x08,
};
/* This enum allows us to identify how the clock is defined on the array of the
@@ -653,6 +672,11 @@ enum mtk_clks_map {
MTK_CLK_GP0,
MTK_CLK_GP1,
MTK_CLK_GP2,
+ MTK_CLK_GP3,
+ MTK_CLK_XGP1,
+ MTK_CLK_XGP2,
+ MTK_CLK_XGP3,
+ MTK_CLK_CRYPTO,
MTK_CLK_FE,
MTK_CLK_TRGPLL,
MTK_CLK_SGMII_TX_250M,
@@ -669,63 +693,145 @@ enum mtk_clks_map {
MTK_CLK_WOCPU1,
MTK_CLK_NETSYS0,
MTK_CLK_NETSYS1,
+ MTK_CLK_ETHWARP_WOCPU2,
+ MTK_CLK_ETHWARP_WOCPU1,
+ MTK_CLK_ETHWARP_WOCPU0,
+ MTK_CLK_TOP_USXGMII_SBUS_0_SEL,
+ MTK_CLK_TOP_USXGMII_SBUS_1_SEL,
+ MTK_CLK_TOP_SGM_0_SEL,
+ MTK_CLK_TOP_SGM_1_SEL,
+ MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL,
+ MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL,
+ MTK_CLK_TOP_ETH_GMII_SEL,
+ MTK_CLK_TOP_ETH_REFCK_50M_SEL,
+ MTK_CLK_TOP_ETH_SYS_200M_SEL,
+ MTK_CLK_TOP_ETH_SYS_SEL,
+ MTK_CLK_TOP_ETH_XGMII_SEL,
+ MTK_CLK_TOP_ETH_MII_SEL,
+ MTK_CLK_TOP_NETSYS_SEL,
+ MTK_CLK_TOP_NETSYS_500M_SEL,
+ MTK_CLK_TOP_NETSYS_PAO_2X_SEL,
+ MTK_CLK_TOP_NETSYS_SYNC_250M_SEL,
+ MTK_CLK_TOP_NETSYS_PPEFB_250M_SEL,
+ MTK_CLK_TOP_NETSYS_WARP_SEL,
MTK_CLK_MAX
};
-#define MT7623_CLKS_BITMAP (BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) | \
- BIT(MTK_CLK_GP1) | BIT(MTK_CLK_GP2) | \
- BIT(MTK_CLK_TRGPLL))
-#define MT7622_CLKS_BITMAP (BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) | \
- BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
- BIT(MTK_CLK_GP2) | \
- BIT(MTK_CLK_SGMII_TX_250M) | \
- BIT(MTK_CLK_SGMII_RX_250M) | \
- BIT(MTK_CLK_SGMII_CDR_REF) | \
- BIT(MTK_CLK_SGMII_CDR_FB) | \
- BIT(MTK_CLK_SGMII_CK) | \
- BIT(MTK_CLK_ETH2PLL))
+#define MT7623_CLKS_BITMAP (BIT_ULL(MTK_CLK_ETHIF) | BIT_ULL(MTK_CLK_ESW) | \
+ BIT_ULL(MTK_CLK_GP1) | BIT_ULL(MTK_CLK_GP2) | \
+ BIT_ULL(MTK_CLK_TRGPLL))
+#define MT7622_CLKS_BITMAP (BIT_ULL(MTK_CLK_ETHIF) | BIT_ULL(MTK_CLK_ESW) | \
+ BIT_ULL(MTK_CLK_GP0) | BIT_ULL(MTK_CLK_GP1) | \
+ BIT_ULL(MTK_CLK_GP2) | \
+ BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII_CK) | \
+ BIT_ULL(MTK_CLK_ETH2PLL))
#define MT7621_CLKS_BITMAP (0)
#define MT7628_CLKS_BITMAP (0)
-#define MT7629_CLKS_BITMAP (BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) | \
- BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
- BIT(MTK_CLK_GP2) | BIT(MTK_CLK_FE) | \
- BIT(MTK_CLK_SGMII_TX_250M) | \
- BIT(MTK_CLK_SGMII_RX_250M) | \
- BIT(MTK_CLK_SGMII_CDR_REF) | \
- BIT(MTK_CLK_SGMII_CDR_FB) | \
- BIT(MTK_CLK_SGMII2_TX_250M) | \
- BIT(MTK_CLK_SGMII2_RX_250M) | \
- BIT(MTK_CLK_SGMII2_CDR_REF) | \
- BIT(MTK_CLK_SGMII2_CDR_FB) | \
- BIT(MTK_CLK_SGMII_CK) | \
- BIT(MTK_CLK_ETH2PLL) | BIT(MTK_CLK_SGMIITOP))
-#define MT7981_CLKS_BITMAP (BIT(MTK_CLK_FE) | BIT(MTK_CLK_GP2) | BIT(MTK_CLK_GP1) | \
- BIT(MTK_CLK_WOCPU0) | \
- BIT(MTK_CLK_SGMII_TX_250M) | \
- BIT(MTK_CLK_SGMII_RX_250M) | \
- BIT(MTK_CLK_SGMII_CDR_REF) | \
- BIT(MTK_CLK_SGMII_CDR_FB) | \
- BIT(MTK_CLK_SGMII2_TX_250M) | \
- BIT(MTK_CLK_SGMII2_RX_250M) | \
- BIT(MTK_CLK_SGMII2_CDR_REF) | \
- BIT(MTK_CLK_SGMII2_CDR_FB) | \
- BIT(MTK_CLK_SGMII_CK))
-#define MT7986_CLKS_BITMAP (BIT(MTK_CLK_FE) | BIT(MTK_CLK_GP2) | BIT(MTK_CLK_GP1) | \
- BIT(MTK_CLK_WOCPU1) | BIT(MTK_CLK_WOCPU0) | \
- BIT(MTK_CLK_SGMII_TX_250M) | \
- BIT(MTK_CLK_SGMII_RX_250M) | \
- BIT(MTK_CLK_SGMII_CDR_REF) | \
- BIT(MTK_CLK_SGMII_CDR_FB) | \
- BIT(MTK_CLK_SGMII2_TX_250M) | \
- BIT(MTK_CLK_SGMII2_RX_250M) | \
- BIT(MTK_CLK_SGMII2_CDR_REF) | \
- BIT(MTK_CLK_SGMII2_CDR_FB))
+#define MT7629_CLKS_BITMAP (BIT_ULL(MTK_CLK_ETHIF) | BIT_ULL(MTK_CLK_ESW) | \
+ BIT_ULL(MTK_CLK_GP0) | BIT_ULL(MTK_CLK_GP1) | \
+ BIT_ULL(MTK_CLK_GP2) | BIT_ULL(MTK_CLK_FE) | \
+ BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII_CK) | \
+ BIT_ULL(MTK_CLK_ETH2PLL) | BIT_ULL(MTK_CLK_SGMIITOP))
+#define MT7981_CLKS_BITMAP (BIT_ULL(MTK_CLK_FE) | BIT_ULL(MTK_CLK_GP2) | \
+ BIT_ULL(MTK_CLK_GP1) | \
+ BIT_ULL(MTK_CLK_WOCPU0) | \
+ BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII_CK))
+#define MT7986_CLKS_BITMAP (BIT_ULL(MTK_CLK_FE) | BIT_ULL(MTK_CLK_GP2) | \
+ BIT_ULL(MTK_CLK_GP1) | \
+ BIT_ULL(MTK_CLK_WOCPU1) | BIT_ULL(MTK_CLK_WOCPU0) | \
+ BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII_CDR_FB) | \
+ BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_REF) | \
+ BIT_ULL(MTK_CLK_SGMII2_CDR_FB))
+#define MT7988_CLKS_BITMAP (BIT_ULL(MTK_CLK_FE) | BIT_ULL(MTK_CLK_ESW) | \
+ BIT_ULL(MTK_CLK_GP1) | BIT_ULL(MTK_CLK_GP2) | \
+ BIT_ULL(MTK_CLK_GP3) | BIT_ULL(MTK_CLK_XGP1) | \
+ BIT_ULL(MTK_CLK_XGP2) | BIT_ULL(MTK_CLK_XGP3) | \
+ BIT_ULL(MTK_CLK_CRYPTO) | \
+ BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \
+ BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \
+ BIT_ULL(MTK_CLK_ETHWARP_WOCPU2) | \
+ BIT_ULL(MTK_CLK_ETHWARP_WOCPU1) | \
+ BIT_ULL(MTK_CLK_ETHWARP_WOCPU0) | \
+ BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_0_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_1_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_SGM_0_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_SGM_1_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_GMII_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_REFCK_50M_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_SYS_200M_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_SYS_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_XGMII_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_ETH_MII_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_500M_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_PAO_2X_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_SYNC_250M_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_PPEFB_250M_SEL) | \
+ BIT_ULL(MTK_CLK_TOP_NETSYS_WARP_SEL))
enum mtk_dev_state {
MTK_HW_INIT,
MTK_RESETTING
};
+/* PSE Port Definition */
+enum mtk_pse_port {
+ PSE_ADMA_PORT = 0,
+ PSE_GDM1_PORT,
+ PSE_GDM2_PORT,
+ PSE_PPE0_PORT,
+ PSE_PPE1_PORT,
+ PSE_QDMA_TX_PORT,
+ PSE_QDMA_RX_PORT,
+ PSE_DROP_PORT,
+ PSE_WDMA0_PORT,
+ PSE_WDMA1_PORT,
+ PSE_TDMA_PORT,
+ PSE_NONE_PORT,
+ PSE_PPE2_PORT,
+ PSE_WDMA2_PORT,
+ PSE_EIP197_PORT,
+ PSE_GDM3_PORT,
+ PSE_PORT_MAX
+};
+
+/* GMAC Identifier */
+enum mtk_gmac_id {
+ MTK_GMAC1_ID = 0,
+ MTK_GMAC2_ID,
+ MTK_GMAC3_ID,
+ MTK_GMAC_ID_MAX
+};
+
enum mtk_tx_buf_type {
MTK_TYPE_SKB,
MTK_TYPE_XDP_TX,
@@ -744,7 +850,8 @@ struct mtk_tx_buf {
enum mtk_tx_buf_type type;
void *data;
- u32 flags;
+ u16 mac_id;
+ u16 flags;
DEFINE_DMA_UNMAP_ADDR(dma_addr0);
DEFINE_DMA_UNMAP_LEN(dma_len0);
DEFINE_DMA_UNMAP_ADDR(dma_addr1);
@@ -820,7 +927,6 @@ enum mkt_eth_capabilities {
MTK_SHARED_INT_BIT,
MTK_TRGMII_MT7621_CLK_BIT,
MTK_QDMA_BIT,
- MTK_NETSYS_V2_BIT,
MTK_SOC_MT7628_BIT,
MTK_RSTCTRL_PPE1_BIT,
MTK_U3_COPHY_V2_BIT,
@@ -843,42 +949,41 @@ enum mkt_eth_capabilities {
};
/* Supported hardware group on SoCs */
-#define MTK_RGMII BIT(MTK_RGMII_BIT)
-#define MTK_TRGMII BIT(MTK_TRGMII_BIT)
-#define MTK_SGMII BIT(MTK_SGMII_BIT)
-#define MTK_ESW BIT(MTK_ESW_BIT)
-#define MTK_GEPHY BIT(MTK_GEPHY_BIT)
-#define MTK_MUX BIT(MTK_MUX_BIT)
-#define MTK_INFRA BIT(MTK_INFRA_BIT)
-#define MTK_SHARED_SGMII BIT(MTK_SHARED_SGMII_BIT)
-#define MTK_HWLRO BIT(MTK_HWLRO_BIT)
-#define MTK_SHARED_INT BIT(MTK_SHARED_INT_BIT)
-#define MTK_TRGMII_MT7621_CLK BIT(MTK_TRGMII_MT7621_CLK_BIT)
-#define MTK_QDMA BIT(MTK_QDMA_BIT)
-#define MTK_NETSYS_V2 BIT(MTK_NETSYS_V2_BIT)
-#define MTK_SOC_MT7628 BIT(MTK_SOC_MT7628_BIT)
-#define MTK_RSTCTRL_PPE1 BIT(MTK_RSTCTRL_PPE1_BIT)
-#define MTK_U3_COPHY_V2 BIT(MTK_U3_COPHY_V2_BIT)
+#define MTK_RGMII BIT_ULL(MTK_RGMII_BIT)
+#define MTK_TRGMII BIT_ULL(MTK_TRGMII_BIT)
+#define MTK_SGMII BIT_ULL(MTK_SGMII_BIT)
+#define MTK_ESW BIT_ULL(MTK_ESW_BIT)
+#define MTK_GEPHY BIT_ULL(MTK_GEPHY_BIT)
+#define MTK_MUX BIT_ULL(MTK_MUX_BIT)
+#define MTK_INFRA BIT_ULL(MTK_INFRA_BIT)
+#define MTK_SHARED_SGMII BIT_ULL(MTK_SHARED_SGMII_BIT)
+#define MTK_HWLRO BIT_ULL(MTK_HWLRO_BIT)
+#define MTK_SHARED_INT BIT_ULL(MTK_SHARED_INT_BIT)
+#define MTK_TRGMII_MT7621_CLK BIT_ULL(MTK_TRGMII_MT7621_CLK_BIT)
+#define MTK_QDMA BIT_ULL(MTK_QDMA_BIT)
+#define MTK_SOC_MT7628 BIT_ULL(MTK_SOC_MT7628_BIT)
+#define MTK_RSTCTRL_PPE1 BIT_ULL(MTK_RSTCTRL_PPE1_BIT)
+#define MTK_U3_COPHY_V2 BIT_ULL(MTK_U3_COPHY_V2_BIT)
#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW \
- BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
+ BIT_ULL(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
#define MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY \
- BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT)
+ BIT_ULL(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT)
#define MTK_ETH_MUX_U3_GMAC2_TO_QPHY \
- BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT)
+ BIT_ULL(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT)
#define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \
- BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT)
+ BIT_ULL(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT)
#define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII \
- BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT)
+ BIT_ULL(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT)
/* Supported path present on SoCs */
-#define MTK_ETH_PATH_GMAC1_RGMII BIT(MTK_ETH_PATH_GMAC1_RGMII_BIT)
-#define MTK_ETH_PATH_GMAC1_TRGMII BIT(MTK_ETH_PATH_GMAC1_TRGMII_BIT)
-#define MTK_ETH_PATH_GMAC1_SGMII BIT(MTK_ETH_PATH_GMAC1_SGMII_BIT)
-#define MTK_ETH_PATH_GMAC2_RGMII BIT(MTK_ETH_PATH_GMAC2_RGMII_BIT)
-#define MTK_ETH_PATH_GMAC2_SGMII BIT(MTK_ETH_PATH_GMAC2_SGMII_BIT)
-#define MTK_ETH_PATH_GMAC2_GEPHY BIT(MTK_ETH_PATH_GMAC2_GEPHY_BIT)
-#define MTK_ETH_PATH_GDM1_ESW BIT(MTK_ETH_PATH_GDM1_ESW_BIT)
+#define MTK_ETH_PATH_GMAC1_RGMII BIT_ULL(MTK_ETH_PATH_GMAC1_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_TRGMII BIT_ULL(MTK_ETH_PATH_GMAC1_TRGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_SGMII BIT_ULL(MTK_ETH_PATH_GMAC1_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_RGMII BIT_ULL(MTK_ETH_PATH_GMAC2_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_SGMII BIT_ULL(MTK_ETH_PATH_GMAC2_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_GEPHY BIT_ULL(MTK_ETH_PATH_GMAC2_GEPHY_BIT)
+#define MTK_ETH_PATH_GDM1_ESW BIT_ULL(MTK_ETH_PATH_GDM1_ESW_BIT)
#define MTK_GMAC1_RGMII (MTK_ETH_PATH_GMAC1_RGMII | MTK_RGMII)
#define MTK_GMAC1_TRGMII (MTK_ETH_PATH_GMAC1_TRGMII | MTK_TRGMII)
@@ -934,11 +1039,13 @@ enum mkt_eth_capabilities {
#define MT7981_CAPS (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | MTK_GMAC2_GEPHY | \
MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA | \
MTK_MUX_U3_GMAC2_TO_QPHY | MTK_U3_COPHY_V2 | \
- MTK_NETSYS_V2 | MTK_RSTCTRL_PPE1)
+ MTK_RSTCTRL_PPE1)
#define MT7986_CAPS (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | \
MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA | \
- MTK_NETSYS_V2 | MTK_RSTCTRL_PPE1)
+ MTK_RSTCTRL_PPE1)
+
+#define MT7988_CAPS (MTK_GDM1_ESW | MTK_QDMA | MTK_RSTCTRL_PPE1)
struct mtk_tx_dma_desc_info {
dma_addr_t addr;
@@ -1009,6 +1116,7 @@ struct mtk_reg_map {
* @required_pctl A bool value to show whether the SoC requires
* the extra setup for those pins used by GMAC.
* @hash_offset Flow table hash offset.
+ * @version SoC version.
* @foe_entry_size Foe table entry size.
* @has_accounting Bool indicating support for accounting of
* offloaded flows.
@@ -1022,14 +1130,16 @@ struct mtk_reg_map {
struct mtk_soc_data {
const struct mtk_reg_map *reg_map;
u32 ana_rgc3;
- u32 caps;
- u32 required_clks;
+ u64 caps;
+ u64 required_clks;
bool required_pctl;
u8 offload_version;
u8 hash_offset;
+ u8 version;
u16 foe_entry_size;
netdev_features_t hw_features;
bool has_accounting;
+ bool disable_pll_modes;
struct {
u32 txd_size;
u32 rxd_size;
@@ -1042,8 +1152,8 @@ struct mtk_soc_data {
#define MTK_DMA_MONITOR_TIMEOUT msecs_to_jiffies(1000)
-/* currently no SoC has more than 2 macs */
-#define MTK_MAX_DEVS 2
+/* currently no SoC has more than 3 macs */
+#define MTK_MAX_DEVS 3
/* struct mtk_eth - This is the main datasructure for holding the state
* of the driver
@@ -1182,6 +1292,21 @@ struct mtk_mac {
/* the struct describing the SoC. these are declared in the soc_xyz.c files */
extern const struct of_device_id of_mtk_match[];
+static inline bool mtk_is_netsys_v1(struct mtk_eth *eth)
+{
+ return eth->soc->version == 1;
+}
+
+static inline bool mtk_is_netsys_v2_or_greater(struct mtk_eth *eth)
+{
+ return eth->soc->version > 1;
+}
+
+static inline bool mtk_is_netsys_v3_or_greater(struct mtk_eth *eth)
+{
+ return eth->soc->version > 2;
+}
+
static inline struct mtk_foe_entry *
mtk_foe_get_entry(struct mtk_ppe *ppe, u16 hash)
{
@@ -1192,7 +1317,7 @@ mtk_foe_get_entry(struct mtk_ppe *ppe, u16 hash)
static inline u32 mtk_get_ib1_ts_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB1_BIND_TIMESTAMP_V2;
return MTK_FOE_IB1_BIND_TIMESTAMP;
@@ -1200,7 +1325,7 @@ static inline u32 mtk_get_ib1_ts_mask(struct mtk_eth *eth)
static inline u32 mtk_get_ib1_ppoe_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB1_BIND_PPPOE_V2;
return MTK_FOE_IB1_BIND_PPPOE;
@@ -1208,7 +1333,7 @@ static inline u32 mtk_get_ib1_ppoe_mask(struct mtk_eth *eth)
static inline u32 mtk_get_ib1_vlan_tag_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB1_BIND_VLAN_TAG_V2;
return MTK_FOE_IB1_BIND_VLAN_TAG;
@@ -1216,7 +1341,7 @@ static inline u32 mtk_get_ib1_vlan_tag_mask(struct mtk_eth *eth)
static inline u32 mtk_get_ib1_vlan_layer_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB1_BIND_VLAN_LAYER_V2;
return MTK_FOE_IB1_BIND_VLAN_LAYER;
@@ -1224,7 +1349,7 @@ static inline u32 mtk_get_ib1_vlan_layer_mask(struct mtk_eth *eth)
static inline u32 mtk_prep_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER_V2, val);
return FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, val);
@@ -1232,7 +1357,7 @@ static inline u32 mtk_prep_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
static inline u32 mtk_get_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER_V2, val);
return FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, val);
@@ -1240,7 +1365,7 @@ static inline u32 mtk_get_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
static inline u32 mtk_get_ib1_pkt_type_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB1_PACKET_TYPE_V2;
return MTK_FOE_IB1_PACKET_TYPE;
@@ -1248,7 +1373,7 @@ static inline u32 mtk_get_ib1_pkt_type_mask(struct mtk_eth *eth)
static inline u32 mtk_get_ib1_pkt_type(struct mtk_eth *eth, u32 val)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return FIELD_GET(MTK_FOE_IB1_PACKET_TYPE_V2, val);
return FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, val);
@@ -1256,7 +1381,7 @@ static inline u32 mtk_get_ib1_pkt_type(struct mtk_eth *eth, u32 val)
static inline u32 mtk_get_ib2_multicast_mask(struct mtk_eth *eth)
{
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
return MTK_FOE_IB2_MULTICAST_V2;
return MTK_FOE_IB2_MULTICAST;
@@ -1267,6 +1392,7 @@ void mtk_stats_update_mac(struct mtk_mac *mac);
void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
+u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned int reg);
int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id);
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 9129821f3ab8..2f0e682449ef 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -208,7 +208,7 @@ int mtk_foe_entry_prepare(struct mtk_eth *eth, struct mtk_foe_entry *entry,
memset(entry, 0, sizeof(*entry));
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE_V2, type) |
FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
@@ -272,7 +272,7 @@ int mtk_foe_entry_set_pse_port(struct mtk_eth *eth,
u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
u32 val = *ib2;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
val &= ~MTK_FOE_IB2_DEST_PORT_V2;
val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, port);
} else {
@@ -423,7 +423,7 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
*ib2 &= ~MTK_FOE_IB2_PORT_MG_V2;
*ib2 |= FIELD_PREP(MTK_FOE_IB2_RX_IDX, txq) |
MTK_FOE_IB2_WDMA_WINFO_V2;
@@ -447,7 +447,7 @@ int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
{
u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
*ib2 &= ~MTK_FOE_IB2_QID_V2;
*ib2 |= FIELD_PREP(MTK_FOE_IB2_QID_V2, queue);
*ib2 |= MTK_FOE_IB2_PSE_QOS_V2;
@@ -603,7 +603,7 @@ __mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
struct mtk_foe_entry *hwe;
u32 val;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP_V2;
entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP_V2,
timestamp);
@@ -619,7 +619,7 @@ __mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
hwe->ib1 = entry->ib1;
if (ppe->accounting) {
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
val = MTK_FOE_IB2_MIB_CNT_V2;
else
val = MTK_FOE_IB2_MIB_CNT;
@@ -979,7 +979,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe)
MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
MTK_PPE_ENTRIES_SHIFT);
- if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(ppe->eth))
val |= MTK_PPE_TB_CFG_INFO_SEL;
ppe_w32(ppe, MTK_PPE_TB_CFG, val);
@@ -995,7 +995,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe)
MTK_PPE_FLOW_CFG_IP4_NAPT |
MTK_PPE_FLOW_CFG_IP4_DSLITE |
MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
- if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(ppe->eth))
val |= MTK_PPE_MD_TOAP_BYP_CRSN0 |
MTK_PPE_MD_TOAP_BYP_CRSN1 |
MTK_PPE_MD_TOAP_BYP_CRSN2 |
@@ -1037,7 +1037,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe)
ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
- if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(ppe->eth)) {
ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT1, 0xcb777);
ppe_w32(ppe, MTK_PPE_SBW_CTRL, 0x7f);
}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index e51de31a52ec..fb6bf58172d9 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -216,6 +216,9 @@ struct mtk_foe_ipv6_6rd {
struct mtk_foe_mac_info l2;
};
+#define MTK_FOE_ENTRY_V1_SIZE 80
+#define MTK_FOE_ENTRY_V2_SIZE 96
+
struct mtk_foe_entry {
u32 ib1;
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 02eebff02d45..a70a5417c173 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -193,7 +193,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue,
info.bss, info.wcid);
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ if (mtk_is_netsys_v2_or_greater(eth)) {
switch (info.wdma_idx) {
case 0:
pse_port = 8;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 985cff910f30..5f062ecb402c 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -1091,7 +1091,7 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
} else {
struct mtk_eth *eth = dev->hw->eth;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (mtk_is_netsys_v2_or_greater(eth))
wed_set(dev, MTK_WED_RESET_IDX,
MTK_WED_RESET_IDX_RX_V2);
else
@@ -1907,7 +1907,7 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
hw->wdma = wdma;
hw->index = index;
hw->irq = irq;
- hw->version = MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ? 2 : 1;
+ hw->version = mtk_is_netsys_v1(eth) ? 1 : 2;
if (hw->version == 1) {
hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f1716a83a4d3..24d0c7c46878 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -294,7 +294,7 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port,
struct mlx4_promisc_qp *dqp, *tmp_dqp;
if (port < 1 || port > dev->caps.num_ports)
- return NULL;
+ return false;
s_steer = &mlx4_priv(dev)->steer[port - 1];
@@ -375,7 +375,7 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port,
bool ret = false;
if (port < 1 || port > dev->caps.num_ports)
- return NULL;
+ return false;
s_steer = &mlx4_priv(dev)->steer[port - 1];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 1874c2f0587f..244bc15a42ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -379,9 +379,9 @@ int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_
if (!htb && htb_qopt->command != TC_HTB_CREATE)
return -EINVAL;
- if (htb_qopt->prio) {
+ if (htb_qopt->prio || htb_qopt->quantum) {
NL_SET_ERR_MSG_MOD(htb_qopt->extack,
- "prio parameter is not supported by device with HTB offload enabled.");
+ "prio and quantum parameters are not supported by device with HTB offload enabled.");
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 891d39b4bfd4..658b7d8d50c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -354,6 +354,12 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_init_limits(sa_entry, attrs);
mlx5e_ipsec_init_macs(sa_entry, attrs);
+
+ if (x->encap) {
+ attrs->encap = true;
+ attrs->sport = x->encap->encap_sport;
+ attrs->dport = x->encap->encap_dport;
+ }
}
static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
@@ -387,8 +393,25 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
if (x->encap) {
- NL_SET_ERR_MSG_MOD(extack, "Encapsulated xfrm state may not be offloaded");
- return -EINVAL;
+ if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
+ NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported");
+ return -EINVAL;
+ }
+
+ if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
+ NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
+ return -EINVAL;
+ }
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
+ NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
+ return -EINVAL;
+ }
+
+ if (x->props.mode != XFRM_MODE_TRANSPORT) {
+ NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
+ return -EINVAL;
+ }
}
if (!x->aead) {
NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 4e9887171508..7a7047263618 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -94,13 +94,20 @@ struct mlx5_accel_esp_xfrm_attrs {
u8 dir : 2;
u8 type : 2;
u8 drop : 1;
+ u8 encap : 1;
u8 family;
struct mlx5_replay_esn replay_esn;
u32 authsize;
u32 reqid;
struct mlx5_ipsec_lft lft;
- u8 smac[ETH_ALEN];
- u8 dmac[ETH_ALEN];
+ union {
+ u8 smac[ETH_ALEN];
+ __be16 sport;
+ };
+ union {
+ u8 dmac[ETH_ALEN];
+ __be16 dport;
+ };
};
enum mlx5_ipsec_cap {
@@ -110,6 +117,7 @@ enum mlx5_ipsec_cap {
MLX5_IPSEC_CAP_ROCE = 1 << 3,
MLX5_IPSEC_CAP_PRIO = 1 << 4,
MLX5_IPSEC_CAP_TUNNEL = 1 << 5,
+ MLX5_IPSEC_CAP_ESPINUDP = 1 << 6,
};
struct mlx5e_priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index dbe87bf89c0d..47baf983147f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -951,37 +951,70 @@ free_reformatbf:
return -EINVAL;
}
+static int get_reformat_type(struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ if (attrs->encap)
+ return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP;
+ return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ case XFRM_DEV_OFFLOAD_OUT:
+ if (attrs->family == AF_INET) {
+ if (attrs->encap)
+ return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4;
+ return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
+ }
+
+ if (attrs->encap)
+ return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6;
+ return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
+ default:
+ WARN_ON(true);
+ }
+
+ return -EINVAL;
+}
+
static int
setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5_pkt_reformat_params *reformat_params)
{
- u8 *reformatbf;
+ struct udphdr *udphdr;
+ char *reformatbf;
+ size_t bfflen;
__be32 spi;
+ void *hdr;
+
+ reformat_params->type = get_reformat_type(attrs);
+ if (reformat_params->type < 0)
+ return reformat_params->type;
switch (attrs->dir) {
case XFRM_DEV_OFFLOAD_IN:
- reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
break;
case XFRM_DEV_OFFLOAD_OUT:
- if (attrs->family == AF_INET)
- reformat_params->type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
- else
- reformat_params->type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
-
- reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE,
- GFP_KERNEL);
+ bfflen = MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
+ if (attrs->encap)
+ bfflen += sizeof(*udphdr);
+
+ reformatbf = kzalloc(bfflen, GFP_KERNEL);
if (!reformatbf)
return -ENOMEM;
+ hdr = reformatbf;
+ if (attrs->encap) {
+ udphdr = (struct udphdr *)reformatbf;
+ udphdr->source = attrs->sport;
+ udphdr->dest = attrs->dport;
+ hdr += sizeof(*udphdr);
+ }
+
/* convert to network format */
spi = htonl(attrs->spi);
- memcpy(reformatbf, &spi, sizeof(spi));
+ memcpy(hdr, &spi, sizeof(spi));
reformat_params->param_0 = attrs->authsize;
- reformat_params->size =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
+ reformat_params->size = bfflen;
reformat_params->data = reformatbf;
break;
default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index a3554bde3e07..5ff06263c5bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -54,6 +54,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
reformat_l3_esp_tunnel_to_l2))
caps |= MLX5_IPSEC_CAP_TUNNEL;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+ reformat_add_esp_transport_over_udp) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ reformat_del_esp_transport_over_udp))
+ caps |= MLX5_IPSEC_CAP_ESPINUDP;
}
if (mlx5_get_roce_state(mdev) &&
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 3ca9fce759ea..71cad6bb6e62 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -29,7 +29,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_nve.o spectrum_nve_vxlan.o \
spectrum_dpipe.o spectrum_trap.o \
spectrum_ethtool.o spectrum_policer.o \
- spectrum_pgt.o
+ spectrum_pgt.o spectrum_port_range.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index f0b2963ebac3..7870327d921b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -43,6 +43,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
MLXSW_AFK_ELEMENT_INFO_U32(FDB_MISS, 0x40, 0, 1),
+ MLXSW_AFK_ELEMENT_INFO_U32(L4_PORT_RANGE, 0x40, 1, 16),
};
struct mlxsw_afk {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 65a4abadc7db..2eac7582c31a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -36,6 +36,7 @@ enum mlxsw_afk_element {
MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
MLXSW_AFK_ELEMENT_FDB_MISS,
+ MLXSW_AFK_ELEMENT_L4_PORT_RANGE,
MLXSW_AFK_ELEMENT_MAX,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 0107cbc32fc7..d637c0348fa1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -32,6 +32,7 @@ struct mlxsw_env {
const struct mlxsw_bus_info *bus_info;
u8 max_module_count; /* Maximum number of modules per-slot. */
u8 num_of_slots; /* Including the main board. */
+ u8 max_eeprom_len; /* Maximum module EEPROM transaction length. */
struct mutex line_cards_lock; /* Protects line cards. */
struct mlxsw_env_line_card *line_cards[];
};
@@ -111,7 +112,7 @@ mlxsw_env_validate_cable_ident(struct mlxsw_core *core, u8 slot_index, int id,
if (err)
return err;
- mlxsw_reg_mcia_pack(mcia_pl, slot_index, id, 0,
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, id,
MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
MLXSW_REG_MCIA_I2C_ADDR_LOW);
err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
@@ -146,6 +147,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index,
int module, u16 offset, u16 size, void *data,
bool qsfp, unsigned int *p_read_size)
{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
char mcia_pl[MLXSW_REG_MCIA_LEN];
char *eeprom_tmp;
u16 i2c_addr;
@@ -153,11 +155,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index,
int status;
int err;
- /* MCIA register accepts buffer size <= 48. Page of size 128 should be
- * read by chunks of size 48, 48, 32. Align the size of the last chunk
- * to avoid reading after the end of the page.
- */
- size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE);
+ size = min_t(u16, size, mlxsw_env->max_eeprom_len);
if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH &&
offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
@@ -188,7 +186,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index,
}
}
- mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page, offset, size,
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page, offset, size,
i2c_addr);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
@@ -266,12 +264,12 @@ mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, u8 slot_index,
page = MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM;
else
page = MLXSW_REG_MCIA_TH_PAGE_NUM;
- mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page,
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page,
MLXSW_REG_MCIA_TH_PAGE_OFF + off,
MLXSW_REG_MCIA_TH_ITEM_SIZE,
MLXSW_REG_MCIA_I2C_ADDR_LOW);
} else {
- mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0,
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, module,
MLXSW_REG_MCIA_PAGE0_LO,
off, MLXSW_REG_MCIA_TH_ITEM_SIZE,
MLXSW_REG_MCIA_I2C_ADDR_HIGH);
@@ -489,9 +487,9 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core,
u8 size;
size = min_t(u8, page->length - bytes_read,
- MLXSW_REG_MCIA_EEPROM_SIZE);
+ mlxsw_env->max_eeprom_len);
- mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page->page,
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page->page,
device_addr + bytes_read, size,
page->i2c_address);
mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank);
@@ -1359,6 +1357,26 @@ static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = {
.got_inactive = mlxsw_env_got_inactive,
};
+static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env)
+{
+ char mcam_pl[MLXSW_REG_MCAM_LEN];
+ bool mcia_128b_supported;
+ int err;
+
+ mlxsw_reg_mcam_pack(mcam_pl,
+ MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
+ err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B,
+ &mcia_128b_supported);
+
+ mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48;
+
+ return 0;
+}
+
int mlxsw_env_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *bus_info,
struct mlxsw_env **p_env)
@@ -1427,10 +1445,15 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_type_set;
+ err = mlxsw_env_max_module_eeprom_len_query(env);
+ if (err)
+ goto err_eeprom_len_query;
+
env->line_cards[0]->active = true;
return 0;
+err_eeprom_len_query:
err_type_set:
mlxsw_env_module_event_disable(env, 0);
err_mlxsw_env_module_event_enable:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8165bf31a99a..4b90ae44b476 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2799,6 +2799,78 @@ static inline void mlxsw_reg_ptar_unpack(char *payload, char *tcam_region_info)
mlxsw_reg_ptar_tcam_region_info_memcpy_from(payload, tcam_region_info);
}
+/* PPRR - Policy-Engine Port Range Register
+ * ----------------------------------------
+ * This register is used for configuring port range identification.
+ */
+#define MLXSW_REG_PPRR_ID 0x3008
+#define MLXSW_REG_PPRR_LEN 0x14
+
+MLXSW_REG_DEFINE(pprr, MLXSW_REG_PPRR_ID, MLXSW_REG_PPRR_LEN);
+
+/* reg_pprr_ipv4
+ * Apply port range register to IPv4 packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, ipv4, 0x00, 31, 1);
+
+/* reg_pprr_ipv6
+ * Apply port range register to IPv6 packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, ipv6, 0x00, 30, 1);
+
+/* reg_pprr_src
+ * Apply port range register to source L4 ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, src, 0x00, 29, 1);
+
+/* reg_pprr_dst
+ * Apply port range register to destination L4 ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, dst, 0x00, 28, 1);
+
+/* reg_pprr_tcp
+ * Apply port range register to TCP packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, tcp, 0x00, 27, 1);
+
+/* reg_pprr_udp
+ * Apply port range register to UDP packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, udp, 0x00, 26, 1);
+
+/* reg_pprr_register_index
+ * Index of Port Range Register being accessed.
+ * Range is 0..cap_max_acl_l4_port_range-1.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pprr, register_index, 0x00, 0, 8);
+
+/* reg_prrr_port_range_min
+ * Minimum port range for comparison.
+ * Match is defined as:
+ * port_range_min <= packet_port <= port_range_max.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, port_range_min, 0x04, 16, 16);
+
+/* reg_prrr_port_range_max
+ * Maximum port range for comparison.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pprr, port_range_max, 0x04, 0, 16);
+
+static inline void mlxsw_reg_pprr_pack(char *payload, u8 register_index)
+{
+ MLXSW_REG_ZERO(pprr, payload);
+ mlxsw_reg_pprr_register_index_set(payload, register_index);
+}
+
/* PPBS - Policy-Engine Policy Based Switching Register
* ----------------------------------------------------
* This register retrieves and sets Policy Based Switching Table entries.
@@ -9568,18 +9640,10 @@ static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind,
*/
#define MLXSW_REG_MCIA_ID 0x9014
-#define MLXSW_REG_MCIA_LEN 0x40
+#define MLXSW_REG_MCIA_LEN 0x94
MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN);
-/* reg_mcia_l
- * Lock bit. Setting this bit will lock the access to the specific
- * cable. Used for updating a full page in a cable EPROM. Any access
- * other then subsequence writes will fail while the port is locked.
- * Access: RW
- */
-MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1);
-
/* reg_mcia_module
* Module number.
* Access: Index
@@ -9644,7 +9708,6 @@ MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256
#define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH 128
-#define MLXSW_REG_MCIA_EEPROM_SIZE 48
#define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50
#define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51
#define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0
@@ -9681,7 +9744,7 @@ enum mlxsw_reg_mcia_eeprom_module_info {
* Bytes to read/write.
* Access: RW
*/
-MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
+MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, 128);
/* This is used to access the optional upper pages (1-3) in the QSFP+
* memory map. Page 1 is available on offset 256 through 383, page 2 -
@@ -9692,14 +9755,12 @@ MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1)
static inline void mlxsw_reg_mcia_pack(char *payload, u8 slot_index, u8 module,
- u8 lock, u8 page_number,
- u16 device_addr, u8 size,
+ u8 page_number, u16 device_addr, u8 size,
u8 i2c_device_addr)
{
MLXSW_REG_ZERO(mcia, payload);
mlxsw_reg_mcia_slot_set(payload, slot_index);
mlxsw_reg_mcia_module_set(payload, module);
- mlxsw_reg_mcia_l_set(payload, lock);
mlxsw_reg_mcia_page_number_set(payload, page_number);
mlxsw_reg_mcia_device_address_set(payload, device_addr);
mlxsw_reg_mcia_size_set(payload, size);
@@ -10509,6 +10570,79 @@ static inline void mlxsw_reg_mcda_pack(char *payload, u32 update_handle,
mlxsw_reg_mcda_data_set(payload, i, *(u32 *) &data[i * 4]);
}
+/* MCAM - Management Capabilities Mask Register
+ * --------------------------------------------
+ * Reports the device supported management features.
+ */
+#define MLXSW_REG_MCAM_ID 0x907F
+#define MLXSW_REG_MCAM_LEN 0x48
+
+MLXSW_REG_DEFINE(mcam, MLXSW_REG_MCAM_ID, MLXSW_REG_MCAM_LEN);
+
+enum mlxsw_reg_mcam_feature_group {
+ /* Enhanced features. */
+ MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES,
+};
+
+/* reg_mcam_feature_group
+ * Feature list mask index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
+
+enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
+ /* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
+ MLXSW_REG_MCAM_MCIA_128B = 34,
+};
+
+#define MLXSW_REG_BYTES_PER_DWORD 0x4
+
+/* reg_mcam_mng_feature_cap_mask
+ * Supported port's enhanced features.
+ * Based on feature_group index.
+ * When bit is set, the feature is supported in the device.
+ * Access: RO
+ */
+#define MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(_dw_num, _offset) \
+ MLXSW_ITEM_BIT_ARRAY(reg, mcam, mng_feature_cap_mask_dw##_dw_num, \
+ _offset, MLXSW_REG_BYTES_PER_DWORD, 1)
+
+/* The access to the bits in the field 'mng_feature_cap_mask' is not same to
+ * other mask fields in other registers. In most of the cases bit #0 is the
+ * first one in the last dword. In MCAM register, the first dword contains bits
+ * #0-#31 and so on, so the access to the bits is simpler using bit array per
+ * dword. Declare each dword of 'mng_feature_cap_mask' field separately.
+ */
+MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(0, 0x28);
+MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(1, 0x2C);
+MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(2, 0x30);
+MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(3, 0x34);
+
+static inline void
+mlxsw_reg_mcam_pack(char *payload, enum mlxsw_reg_mcam_feature_group feat_group)
+{
+ MLXSW_REG_ZERO(mcam, payload);
+ mlxsw_reg_mcam_feature_group_set(payload, feat_group);
+}
+
+static inline void
+mlxsw_reg_mcam_unpack(char *payload,
+ enum mlxsw_reg_mcam_mng_feature_cap_mask_bits bit,
+ bool *p_mng_feature_cap_val)
+{
+ int offset = bit % (MLXSW_REG_BYTES_PER_DWORD * BITS_PER_BYTE);
+ int dword = bit / (MLXSW_REG_BYTES_PER_DWORD * BITS_PER_BYTE);
+ u8 (*getters[])(const char *, u16) = {
+ mlxsw_reg_mcam_mng_feature_cap_mask_dw0_get,
+ mlxsw_reg_mcam_mng_feature_cap_mask_dw1_get,
+ mlxsw_reg_mcam_mng_feature_cap_mask_dw2_get,
+ mlxsw_reg_mcam_mng_feature_cap_mask_dw3_get,
+ };
+
+ if (!WARN_ON_ONCE(dword >= ARRAY_SIZE(getters)))
+ *p_mng_feature_cap_val = getters[dword](payload, offset);
+}
+
/* MPSC - Monitoring Packet Sampling Configuration Register
* --------------------------------------------------------
* MPSC Register is used to configure the Packet Sampling mechanism.
@@ -12819,6 +12953,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(pacl),
MLXSW_REG(pagt),
MLXSW_REG(ptar),
+ MLXSW_REG(pprr),
MLXSW_REG(ppbs),
MLXSW_REG(prcr),
MLXSW_REG(pefa),
@@ -12901,10 +13036,11 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcion),
MLXSW_REG(mtpps),
MLXSW_REG(mtutc),
- MLXSW_REG(mpsc),
MLXSW_REG(mcqi),
MLXSW_REG(mcc),
MLXSW_REG(mcda),
+ MLXSW_REG(mcam),
+ MLXSW_REG(mpsc),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
MLXSW_REG(mogcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 19ae0d1c74a8..89dd2777ec4d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -39,6 +39,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_ACL_FLEX_KEYS,
MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE,
MLXSW_RES_ID_ACL_ACTIONS_PER_SET,
+ MLXSW_RES_ID_ACL_MAX_L4_PORT_RANGE,
MLXSW_RES_ID_ACL_MAX_ERPT_BANKS,
MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE,
MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID,
@@ -99,6 +100,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910,
[MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911,
[MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912,
+ [MLXSW_RES_ID_ACL_MAX_L4_PORT_RANGE] = 0x2920,
[MLXSW_RES_ID_ACL_MAX_ERPT_BANKS] = 0x2940,
[MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE] = 0x2941,
[MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID] = 0x2942,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 25a01dafde1b..f0f6af3ec7c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1132,8 +1132,8 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev,
return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid));
}
-static int mlxsw_sp_port_kill_vid(struct net_device *dev,
- __be16 __always_unused proto, u16 vid)
+int mlxsw_sp_port_kill_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
@@ -3188,6 +3188,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_nve_init;
}
+ err = mlxsw_sp_port_range_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize port ranges\n");
+ goto err_port_range_init;
+ }
+
err = mlxsw_sp_acl_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3280,6 +3286,8 @@ err_ptp_clock_init:
err_router_init:
mlxsw_sp_acl_fini(mlxsw_sp);
err_acl_init:
+ mlxsw_sp_port_range_fini(mlxsw_sp);
+err_port_range_init:
mlxsw_sp_nve_fini(mlxsw_sp);
err_nve_init:
mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp);
@@ -3462,6 +3470,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
}
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
+ mlxsw_sp_port_range_fini(mlxsw_sp);
mlxsw_sp_nve_fini(mlxsw_sp);
mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
@@ -3730,6 +3739,26 @@ static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core)
&size_params);
}
+static int
+mlxsw_sp_resources_port_range_register(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct devlink_resource_size_params size_params;
+ u64 max;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, ACL_MAX_L4_PORT_RANGE))
+ return -EIO;
+
+ max = MLXSW_CORE_RES_GET(mlxsw_core, ACL_MAX_L4_PORT_RANGE);
+ devlink_resource_size_params_init(&size_params, max, max, 1,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+
+ return devl_resource_register(devlink, "port_range_registers", max,
+ MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ &size_params);
+}
+
static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
{
int err;
@@ -3758,8 +3787,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rifs_register;
+ err = mlxsw_sp_resources_port_range_register(mlxsw_core);
+ if (err)
+ goto err_resources_port_range_register;
+
return 0;
+err_resources_port_range_register:
err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
@@ -3797,8 +3831,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rifs_register;
+ err = mlxsw_sp_resources_port_range_register(mlxsw_core);
+ if (err)
+ goto err_resources_port_range_register;
+
return 0;
+err_resources_port_range_register:
err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
@@ -4298,6 +4337,88 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
return -EBUSY;
}
+static int mlxsw_sp_lag_uppers_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *upper_dev;
+ struct net_device *master;
+ struct list_head *iter;
+ int done = 0;
+ int err;
+
+ master = netdev_master_upper_dev_get(lag_dev);
+ if (master && netif_is_bridge_master(master)) {
+ err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lag_dev, master,
+ extack);
+ if (err)
+ return err;
+ }
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ master = netdev_master_upper_dev_get(upper_dev);
+ if (master && netif_is_bridge_master(master)) {
+ err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
+ upper_dev, master,
+ extack);
+ if (err)
+ goto err_port_bridge_join;
+ }
+
+ ++done;
+ }
+
+ return 0;
+
+err_port_bridge_join:
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ master = netdev_master_upper_dev_get(upper_dev);
+ if (!master || !netif_is_bridge_master(master))
+ continue;
+
+ if (!done--)
+ break;
+
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master);
+ }
+
+ master = netdev_master_upper_dev_get(lag_dev);
+ if (master && netif_is_bridge_master(master))
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master);
+
+ return err;
+}
+
+static void
+mlxsw_sp_lag_uppers_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct net_device *upper_dev;
+ struct net_device *master;
+ struct list_head *iter;
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ master = netdev_master_upper_dev_get(upper_dev);
+ if (!master)
+ continue;
+
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master);
+ }
+
+ master = netdev_master_upper_dev_get(lag_dev);
+ if (master)
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master);
+}
+
static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *lag_dev,
struct netlink_ext_ack *extack)
@@ -4322,6 +4443,12 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index);
if (err)
return err;
+
+ err = mlxsw_sp_lag_uppers_bridge_join(mlxsw_sp_port, lag_dev,
+ extack);
+ if (err)
+ goto err_lag_uppers_bridge_join;
+
err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
if (err)
goto err_col_port_add;
@@ -4342,8 +4469,14 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
if (err)
goto err_router_join;
+ err = mlxsw_sp_netdevice_enslavement_replay(mlxsw_sp, lag_dev, extack);
+ if (err)
+ goto err_replay;
+
return 0;
+err_replay:
+ mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev);
err_router_join:
lag->ref_count--;
mlxsw_sp_port->lagged = 0;
@@ -4351,6 +4484,8 @@ err_router_join:
mlxsw_sp_port->local_port);
mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
err_col_port_add:
+ mlxsw_sp_lag_uppers_bridge_leave(mlxsw_sp_port, lag_dev);
+err_lag_uppers_bridge_join:
if (!lag->ref_count)
mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
return err;
@@ -4600,9 +4735,62 @@ static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
return true;
}
+static bool mlxsw_sp_netdev_is_master(struct net_device *upper_dev,
+ struct net_device *dev)
+{
+ return upper_dev == netdev_master_upper_dev_get(dev);
+}
+
+static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp,
+ unsigned long event, void *ptr,
+ bool process_foreign);
+
+static int mlxsw_sp_netdevice_validate_uppers(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *upper_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) {
+ struct netdev_notifier_changeupper_info info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .master = mlxsw_sp_netdev_is_master(upper_dev, dev),
+ .upper_dev = upper_dev,
+ .linking = true,
+
+ /* upper_info is relevant for LAG devices. But we would
+ * only need this if LAG were a valid upper above
+ * another upper (e.g. a bridge that is a member of a
+ * LAG), and that is never a valid configuration. So we
+ * can keep this as NULL.
+ */
+ .upper_info = NULL,
+ };
+
+ err = __mlxsw_sp_netdevice_event(mlxsw_sp,
+ NETDEV_PRECHANGEUPPER,
+ &info, true);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, upper_dev,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
struct net_device *dev,
- unsigned long event, void *ptr)
+ unsigned long event, void *ptr,
+ bool replay_deslavement)
{
struct netdev_notifier_changeupper_info *info;
struct mlxsw_sp_port *mlxsw_sp_port;
@@ -4640,8 +4828,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
upper_dev))) {
- NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
- return -EINVAL;
+ err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp,
+ upper_dev,
+ extack);
+ if (err)
+ return err;
}
if (netif_is_lag_master(upper_dev) &&
!mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
@@ -4656,11 +4847,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
return -EINVAL;
}
- if (netif_is_macvlan(upper_dev) &&
- !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) {
- NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
- return -EOPNOTSUPP;
- }
if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
return -EINVAL;
@@ -4707,15 +4893,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
if (netif_is_bridge_master(upper_dev)) {
- if (info->linking)
+ if (info->linking) {
err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
lower_dev,
upper_dev,
extack);
- else
+ } else {
mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
lower_dev,
upper_dev);
+ if (!replay_deslavement)
+ break;
+ mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp,
+ lower_dev);
+ }
} else if (netif_is_lag_master(upper_dev)) {
if (info->linking) {
err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
@@ -4724,6 +4915,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
mlxsw_sp_port_lag_leave(mlxsw_sp_port,
upper_dev);
+ mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp,
+ dev);
}
} else if (netif_is_ovs_master(upper_dev)) {
if (info->linking)
@@ -4776,13 +4969,15 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev,
struct net_device *port_dev,
- unsigned long event, void *ptr)
+ unsigned long event, void *ptr,
+ bool replay_deslavement)
{
switch (event) {
case NETDEV_PRECHANGEUPPER:
case NETDEV_CHANGEUPPER:
return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev,
- event, ptr);
+ event, ptr,
+ replay_deslavement);
case NETDEV_CHANGELOWERSTATE:
return mlxsw_sp_netdevice_port_lower_event(port_dev, event,
ptr);
@@ -4791,6 +4986,30 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev,
return 0;
}
+/* Called for LAG or its upper VLAN after the per-LAG-lower processing was done,
+ * to do any per-LAG / per-LAG-upper processing.
+ */
+static int mlxsw_sp_netdevice_post_lag_event(struct net_device *dev,
+ unsigned long event,
+ void *ptr)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(dev);
+ struct netdev_notifier_changeupper_info *info = ptr;
+
+ if (!mlxsw_sp)
+ return 0;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ if (info->linking)
+ break;
+ if (netif_is_bridge_master(info->upper_dev))
+ mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, dev);
+ break;
+ }
+ return 0;
+}
+
static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
unsigned long event, void *ptr)
{
@@ -4801,19 +5020,19 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
netdev_for_each_lower_dev(lag_dev, dev, iter) {
if (mlxsw_sp_port_dev_check(dev)) {
ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event,
- ptr);
+ ptr, false);
if (ret)
return ret;
}
}
- return 0;
+ return mlxsw_sp_netdevice_post_lag_event(lag_dev, event, ptr);
}
static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
struct net_device *dev,
unsigned long event, void *ptr,
- u16 vid)
+ u16 vid, bool replay_deslavement)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -4844,27 +5063,30 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
upper_dev))) {
- NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
- return -EINVAL;
- }
- if (netif_is_macvlan(upper_dev) &&
- !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) {
- NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
- return -EOPNOTSUPP;
+ err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp,
+ upper_dev,
+ extack);
+ if (err)
+ return err;
}
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
if (netif_is_bridge_master(upper_dev)) {
- if (info->linking)
+ if (info->linking) {
err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
vlan_dev,
upper_dev,
extack);
- else
+ } else {
mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
vlan_dev,
upper_dev);
+ if (!replay_deslavement)
+ break;
+ mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp,
+ vlan_dev);
+ }
} else if (netif_is_macvlan(upper_dev)) {
if (!info->linking)
mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
@@ -4888,26 +5110,26 @@ static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev,
if (mlxsw_sp_port_dev_check(dev)) {
ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev,
event, ptr,
- vid);
+ vid, false);
if (ret)
return ret;
}
}
- return 0;
+ return mlxsw_sp_netdevice_post_lag_event(vlan_dev, event, ptr);
}
-static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
+static int mlxsw_sp_netdevice_bridge_vlan_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *vlan_dev,
struct net_device *br_dev,
unsigned long event, void *ptr,
- u16 vid)
+ u16 vid, bool process_foreign)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
struct netdev_notifier_changeupper_info *info = ptr;
struct netlink_ext_ack *extack;
struct net_device *upper_dev;
- if (!mlxsw_sp)
+ if (!process_foreign && !mlxsw_sp_lower_get(vlan_dev))
return 0;
extack = netdev_notifier_info_to_extack(&info->info);
@@ -4920,13 +5142,6 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
return -EOPNOTSUPP;
}
- if (!info->linking)
- break;
- if (netif_is_macvlan(upper_dev) &&
- !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) {
- NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
- return -EOPNOTSUPP;
- }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
@@ -4940,36 +5155,42 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
return 0;
}
-static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
- unsigned long event, void *ptr)
+static int mlxsw_sp_netdevice_vlan_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *vlan_dev,
+ unsigned long event, void *ptr,
+ bool process_foreign)
{
struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
u16 vid = vlan_dev_vlan_id(vlan_dev);
if (mlxsw_sp_port_dev_check(real_dev))
return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev,
- event, ptr, vid);
+ event, ptr, vid,
+ true);
else if (netif_is_lag_master(real_dev))
return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev,
real_dev, event,
ptr, vid);
else if (netif_is_bridge_master(real_dev))
- return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev,
- event, ptr, vid);
+ return mlxsw_sp_netdevice_bridge_vlan_event(mlxsw_sp, vlan_dev,
+ real_dev, event,
+ ptr, vid,
+ process_foreign);
return 0;
}
-static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
- unsigned long event, void *ptr)
+static int mlxsw_sp_netdevice_bridge_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *br_dev,
+ unsigned long event, void *ptr,
+ bool process_foreign)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev);
struct netdev_notifier_changeupper_info *info = ptr;
struct netlink_ext_ack *extack;
struct net_device *upper_dev;
u16 proto;
- if (!mlxsw_sp)
+ if (!process_foreign && !mlxsw_sp_lower_get(br_dev))
return 0;
extack = netdev_notifier_info_to_extack(&info->info);
@@ -4997,11 +5218,6 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are only supported with 802.1q VLAN protocol");
return -EOPNOTSUPP;
}
- if (netif_is_macvlan(upper_dev) &&
- !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) {
- NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
- return -EOPNOTSUPP;
- }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
@@ -5107,35 +5323,48 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
return 0;
}
-static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp,
+ unsigned long event, void *ptr,
+ bool process_foreign)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlxsw_sp_span_entry *span_entry;
- struct mlxsw_sp *mlxsw_sp;
int err = 0;
- mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
if (event == NETDEV_UNREGISTER) {
span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
if (span_entry)
mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
}
- mlxsw_sp_span_respin(mlxsw_sp);
if (netif_is_vxlan(dev))
err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
else if (mlxsw_sp_port_dev_check(dev))
- err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr);
+ err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr, true);
else if (netif_is_lag_master(dev))
err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
else if (is_vlan_dev(dev))
- err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+ err = mlxsw_sp_netdevice_vlan_event(mlxsw_sp, dev, event, ptr,
+ process_foreign);
else if (netif_is_bridge_master(dev))
- err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
+ err = mlxsw_sp_netdevice_bridge_event(mlxsw_sp, dev, event, ptr,
+ process_foreign);
else if (netif_is_macvlan(dev))
err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr);
+ return err;
+}
+
+static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlxsw_sp *mlxsw_sp;
+ int err;
+
+ mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ mlxsw_sp_span_respin(mlxsw_sp);
+ err = __mlxsw_sp_netdevice_event(mlxsw_sp, event, ptr, false);
+
return notifier_from_errno(err);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 231e364cbb7c..65eaa181e0aa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -69,6 +69,7 @@ enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
MLXSW_SP_RESOURCE_RIFS,
+ MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS,
};
struct mlxsw_sp_port;
@@ -175,6 +176,7 @@ struct mlxsw_sp {
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct mlxsw_sp_policer_core *policer_core;
+ struct mlxsw_sp_port_range_core *pr_core;
struct mlxsw_sp_kvdl *kvdl;
struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
@@ -698,6 +700,8 @@ int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
struct mlxsw_sp_port_vlan *
mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
+int mlxsw_sp_port_kill_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid);
int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
u16 vid_end, bool is_member, bool untagged);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
@@ -865,9 +869,13 @@ struct mlxsw_sp_acl_rule_info {
egress_bind_blocker:1,
counter_valid:1,
policer_index_valid:1,
- ipv6_valid:1;
+ ipv6_valid:1,
+ src_port_range_reg_valid:1,
+ dst_port_range_reg_valid:1;
unsigned int counter_index;
u16 policer_index;
+ u8 src_port_range_reg_index;
+ u8 dst_port_range_reg_index;
struct {
u32 prev_val;
enum mlxsw_sp_acl_mangle_field prev_field;
@@ -992,7 +1000,8 @@ void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset,
struct mlxsw_sp_acl_rule_info *
mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl,
struct mlxsw_afa_block *afa_block);
-void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei);
+void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei);
void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
unsigned int priority);
@@ -1484,4 +1493,18 @@ int mlxsw_sp_pgt_entry_port_set(struct mlxsw_sp *mlxsw_sp, u16 mid,
int mlxsw_sp_pgt_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_pgt_fini(struct mlxsw_sp *mlxsw_sp);
+/* spectrum_port_range.c */
+struct mlxsw_sp_port_range {
+ u16 min;
+ u16 max;
+ u8 source:1; /* Source or destination */
+};
+
+int mlxsw_sp_port_range_reg_get(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_port_range *range,
+ struct netlink_ext_ack *extack,
+ u8 *p_prr_index);
+void mlxsw_sp_port_range_reg_put(struct mlxsw_sp *mlxsw_sp, u8 prr_index);
+int mlxsw_sp_port_range_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_port_range_fini(struct mlxsw_sp *mlxsw_sp);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
index 3a636f753607..dfcdd37e797b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
@@ -90,7 +90,7 @@ mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
err_entry_add:
err_rulei_commit:
err_rulei_act_continue:
- mlxsw_sp_acl_rulei_destroy(rulei);
+ mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rulei);
err_rulei_create:
mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
return err;
@@ -105,7 +105,7 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
&region->catchall.cchunk,
&region->catchall.centry);
- mlxsw_sp_acl_rulei_destroy(rulei);
+ mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rulei);
mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 0423ac262d89..186161a3459d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -339,10 +339,17 @@ err_afa_block_create:
return ERR_PTR(err);
}
-void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei)
+void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei)
{
if (rulei->action_created)
mlxsw_afa_block_destroy(rulei->act_block);
+ if (rulei->src_port_range_reg_valid)
+ mlxsw_sp_port_range_reg_put(mlxsw_sp,
+ rulei->src_port_range_reg_index);
+ if (rulei->dst_port_range_reg_valid)
+ mlxsw_sp_port_range_reg_put(mlxsw_sp,
+ rulei->dst_port_range_reg_index);
kfree(rulei);
}
@@ -834,7 +841,7 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
- mlxsw_sp_acl_rulei_destroy(rule->rulei);
+ mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rule->rulei);
kfree(rule);
mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 4dea39f2b304..b7f58605b6c7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -31,12 +31,14 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 16, 16),
MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 16, 16),
MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
@@ -205,6 +207,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_0[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_2[] = {
MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x04, 16, 9), /* TCP_CONTROL + TCP_ECN */
+ MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 0, 16),
};
static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 72917f09e806..8329100479b3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -418,6 +418,68 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+static int
+mlxsw_sp_flower_parse_ports_range(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct flow_cls_offload *f, u8 ip_proto)
+{
+ const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_ports_range match;
+ u32 key_mask_value = 0;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ return 0;
+
+ if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Only UDP and TCP keys are supported");
+ return -EINVAL;
+ }
+
+ flow_rule_match_ports_range(rule, &match);
+
+ if (match.mask->tp_min.src) {
+ struct mlxsw_sp_port_range range = {
+ .min = ntohs(match.key->tp_min.src),
+ .max = ntohs(match.key->tp_max.src),
+ .source = true,
+ };
+ u8 prr_index;
+ int err;
+
+ err = mlxsw_sp_port_range_reg_get(mlxsw_sp, &range,
+ f->common.extack, &prr_index);
+ if (err)
+ return err;
+
+ rulei->src_port_range_reg_index = prr_index;
+ rulei->src_port_range_reg_valid = true;
+ key_mask_value |= BIT(prr_index);
+ }
+
+ if (match.mask->tp_min.dst) {
+ struct mlxsw_sp_port_range range = {
+ .min = ntohs(match.key->tp_min.dst),
+ .max = ntohs(match.key->tp_max.dst),
+ };
+ u8 prr_index;
+ int err;
+
+ err = mlxsw_sp_port_range_reg_get(mlxsw_sp, &range,
+ f->common.extack, &prr_index);
+ if (err)
+ return err;
+
+ rulei->dst_port_range_reg_index = prr_index;
+ rulei->dst_port_range_reg_valid = true;
+ key_mask_value |= BIT(prr_index);
+ }
+
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_L4_PORT_RANGE,
+ key_mask_value, key_mask_value);
+
+ return 0;
+}
+
static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
struct flow_cls_offload *f,
@@ -503,6 +565,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS_RANGE) |
BIT(FLOW_DISSECTOR_KEY_TCP) |
BIT(FLOW_DISSECTOR_KEY_IP) |
BIT(FLOW_DISSECTOR_KEY_VLAN))) {
@@ -604,6 +667,11 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto);
if (err)
return err;
+
+ err = mlxsw_sp_flower_parse_ports_range(mlxsw_sp, rulei, f, ip_proto);
+ if (err)
+ return err;
+
err = mlxsw_sp_flower_parse_tcp(mlxsw_sp, rulei, f, ip_proto);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c
new file mode 100644
index 000000000000..2d193de12be6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/bits.h>
+#include <linux/netlink.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_port_range_reg {
+ struct mlxsw_sp_port_range range;
+ refcount_t refcount;
+ u32 index;
+};
+
+struct mlxsw_sp_port_range_core {
+ struct xarray prr_xa;
+ struct xa_limit prr_ids;
+ atomic_t prr_count;
+};
+
+static int
+mlxsw_sp_port_range_reg_configure(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_port_range_reg *prr)
+{
+ char pprr_pl[MLXSW_REG_PPRR_LEN];
+
+ /* We do not care if packet is IPv4/IPv6 and TCP/UDP, so set all four
+ * fields.
+ */
+ mlxsw_reg_pprr_pack(pprr_pl, prr->index);
+ mlxsw_reg_pprr_ipv4_set(pprr_pl, true);
+ mlxsw_reg_pprr_ipv6_set(pprr_pl, true);
+ mlxsw_reg_pprr_src_set(pprr_pl, prr->range.source);
+ mlxsw_reg_pprr_dst_set(pprr_pl, !prr->range.source);
+ mlxsw_reg_pprr_tcp_set(pprr_pl, true);
+ mlxsw_reg_pprr_udp_set(pprr_pl, true);
+ mlxsw_reg_pprr_port_range_min_set(pprr_pl, prr->range.min);
+ mlxsw_reg_pprr_port_range_max_set(pprr_pl, prr->range.max);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pprr), pprr_pl);
+}
+
+static struct mlxsw_sp_port_range_reg *
+mlxsw_sp_port_range_reg_create(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_port_range *range,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core;
+ struct mlxsw_sp_port_range_reg *prr;
+ int err;
+
+ prr = kzalloc(sizeof(*prr), GFP_KERNEL);
+ if (!prr)
+ return ERR_PTR(-ENOMEM);
+
+ prr->range = *range;
+ refcount_set(&prr->refcount, 1);
+
+ err = xa_alloc(&pr_core->prr_xa, &prr->index, prr, pr_core->prr_ids,
+ GFP_KERNEL);
+ if (err) {
+ if (err == -EBUSY)
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of port range registers");
+ goto err_xa_alloc;
+ }
+
+ err = mlxsw_sp_port_range_reg_configure(mlxsw_sp, prr);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to configure port range register");
+ goto err_reg_configure;
+ }
+
+ atomic_inc(&pr_core->prr_count);
+
+ return prr;
+
+err_reg_configure:
+ xa_erase(&pr_core->prr_xa, prr->index);
+err_xa_alloc:
+ kfree(prr);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_port_range_reg_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_port_range_reg *prr)
+{
+ struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core;
+
+ atomic_dec(&pr_core->prr_count);
+ xa_erase(&pr_core->prr_xa, prr->index);
+ kfree(prr);
+}
+
+static struct mlxsw_sp_port_range_reg *
+mlxsw_sp_port_range_reg_find(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_port_range *range)
+{
+ struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core;
+ struct mlxsw_sp_port_range_reg *prr;
+ unsigned long index;
+
+ xa_for_each(&pr_core->prr_xa, index, prr) {
+ if (prr->range.min == range->min &&
+ prr->range.max == range->max &&
+ prr->range.source == range->source)
+ return prr;
+ }
+
+ return NULL;
+}
+
+int mlxsw_sp_port_range_reg_get(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_port_range *range,
+ struct netlink_ext_ack *extack,
+ u8 *p_prr_index)
+{
+ struct mlxsw_sp_port_range_reg *prr;
+
+ prr = mlxsw_sp_port_range_reg_find(mlxsw_sp, range);
+ if (prr) {
+ refcount_inc(&prr->refcount);
+ *p_prr_index = prr->index;
+ return 0;
+ }
+
+ prr = mlxsw_sp_port_range_reg_create(mlxsw_sp, range, extack);
+ if (IS_ERR(prr))
+ return PTR_ERR(prr);
+
+ *p_prr_index = prr->index;
+
+ return 0;
+}
+
+void mlxsw_sp_port_range_reg_put(struct mlxsw_sp *mlxsw_sp, u8 prr_index)
+{
+ struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core;
+ struct mlxsw_sp_port_range_reg *prr;
+
+ prr = xa_load(&pr_core->prr_xa, prr_index);
+ if (WARN_ON(!prr))
+ return;
+
+ if (!refcount_dec_and_test(&prr->refcount))
+ return;
+
+ mlxsw_sp_port_range_reg_destroy(mlxsw_sp, prr);
+}
+
+static u64 mlxsw_sp_port_range_reg_occ_get(void *priv)
+{
+ struct mlxsw_sp_port_range_core *pr_core = priv;
+
+ return atomic_read(&pr_core->prr_count);
+}
+
+int mlxsw_sp_port_range_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_port_range_core *pr_core;
+ struct mlxsw_core *core = mlxsw_sp->core;
+ u64 max;
+
+ if (!MLXSW_CORE_RES_VALID(core, ACL_MAX_L4_PORT_RANGE))
+ return -EIO;
+ max = MLXSW_CORE_RES_GET(core, ACL_MAX_L4_PORT_RANGE);
+
+ /* Each port range register is represented using a single bit in the
+ * two bytes "l4_port_range" ACL key element.
+ */
+ WARN_ON(max > BITS_PER_BYTE * sizeof(u16));
+
+ pr_core = kzalloc(sizeof(*mlxsw_sp->pr_core), GFP_KERNEL);
+ if (!pr_core)
+ return -ENOMEM;
+ mlxsw_sp->pr_core = pr_core;
+
+ pr_core->prr_ids.max = max - 1;
+ xa_init_flags(&pr_core->prr_xa, XA_FLAGS_ALLOC);
+
+ devl_resource_occ_get_register(priv_to_devlink(core),
+ MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS,
+ mlxsw_sp_port_range_reg_occ_get,
+ pr_core);
+
+ return 0;
+}
+
+void mlxsw_sp_port_range_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core;
+
+ devl_resource_occ_get_unregister(priv_to_devlink(mlxsw_sp->core),
+ MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS);
+ WARN_ON(!xa_empty(&pr_core->prr_xa));
+ xa_destroy(&pr_core->prr_xa);
+ kfree(pr_core);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index b32adf277a22..57f0faac836c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -139,6 +139,7 @@ struct mlxsw_sp_rif_ops {
struct netlink_ext_ack *extack);
void (*deconfigure)(struct mlxsw_sp_rif *rif);
struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params,
struct netlink_ext_ack *extack);
void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
};
@@ -2871,6 +2872,21 @@ static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
return !!mlxsw_sp_port;
}
+static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router,
+ struct neighbour *n)
+{
+ struct net *net;
+
+ net = neigh_parms_net(n->parms);
+
+ /* Take a reference to ensure the neighbour won't be destructed until we
+ * drop the reference in delayed work.
+ */
+ neigh_clone(n);
+ return mlxsw_sp_router_schedule_work(net, router, n,
+ mlxsw_sp_router_neigh_event_work);
+}
+
static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2878,7 +2894,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
unsigned long interval;
struct neigh_parms *p;
struct neighbour *n;
- struct net *net;
router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
@@ -2902,7 +2917,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
break;
case NETEVENT_NEIGH_UPDATE:
n = ptr;
- net = neigh_parms_net(n->parms);
if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
return NOTIFY_DONE;
@@ -2910,13 +2924,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
if (!mlxsw_sp_dev_lower_is_port(n->dev))
return NOTIFY_DONE;
- /* Take a reference to ensure the neighbour won't be
- * destructed until we drop the reference in delayed
- * work.
- */
- neigh_clone(n);
- return mlxsw_sp_router_schedule_work(net, router, n,
- mlxsw_sp_router_neigh_event_work);
+ return mlxsw_sp_router_schedule_neigh_work(router, n);
case NETEVENT_IPV4_MPATH_HASH_UPDATE:
case NETEVENT_IPV6_MPATH_HASH_UPDATE:
@@ -2975,6 +2983,52 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
}
+struct mlxsw_sp_neigh_rif_made_sync {
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err;
+};
+
+static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
+{
+ struct mlxsw_sp_neigh_rif_made_sync *rms = data;
+ int rc;
+
+ if (rms->err)
+ return;
+ if (n->dev != mlxsw_sp_rif_dev(rms->rif))
+ return;
+ rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n);
+ if (rc != NOTIFY_DONE)
+ rms->err = -ENOMEM;
+}
+
+static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_neigh_rif_made_sync rms = {
+ .mlxsw_sp = mlxsw_sp,
+ .rif = rif,
+ };
+
+ neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+ if (rms.err)
+ goto err_arp;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+#endif
+ if (rms.err)
+ goto err_nd;
+
+ return 0;
+
+err_nd:
+err_arp:
+ mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
+ return rms.err;
+}
+
enum mlxsw_sp_nexthop_type {
MLXSW_SP_NEXTHOP_TYPE_ETH,
MLXSW_SP_NEXTHOP_TYPE_IPIP,
@@ -4396,6 +4450,19 @@ err_neigh_init:
return err;
}
+static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ switch (nh->type) {
+ case MLXSW_SP_NEXTHOP_TYPE_ETH:
+ return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+ case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+ break;
+ }
+
+ return 0;
+}
+
static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
@@ -4524,6 +4591,35 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
}
}
+static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_nexthop *nh, *tmp;
+ unsigned int n = 0;
+ int err;
+
+ list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
+ crif_list_node) {
+ err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh);
+ if (err)
+ goto err_nexthop_type_rif;
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
+ n++;
+ }
+
+ return 0;
+
+err_nexthop_type_rif:
+ list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
+ crif_list_node) {
+ if (!n--)
+ break;
+ mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
+ }
+ return err;
+}
+
static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif)
{
@@ -7884,6 +7980,26 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
}
+static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ int err;
+
+ err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif);
+ if (err)
+ goto err_nexthop;
+
+ return 0;
+
+err_nexthop:
+ mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
+ return err;
+}
+
static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif)
{
@@ -8300,7 +8416,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
rif->rif_entries = rif_entries;
if (ops->fid_get) {
- fid = ops->fid_get(rif, extack);
+ fid = ops->fid_get(rif, params, extack);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
goto err_fid_get;
@@ -8321,6 +8437,10 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
goto err_mr_rif_add;
}
+ err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif);
+ if (err)
+ goto err_rif_made_sync;
+
if (netdev_offload_xstats_enabled(params->dev,
NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
err = mlxsw_sp_router_port_l3_stats_enable(rif);
@@ -8335,6 +8455,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
return rif;
err_stats_enable:
+ mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
+err_rif_made_sync:
err_mr_rif_add:
for (i--; i >= 0; i--)
mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
@@ -8410,6 +8532,110 @@ out:
mutex_unlock(&mlxsw_sp->router->lock);
}
+static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *br_dev,
+ u16 vid)
+{
+ struct net_device *upper_dev;
+ struct mlxsw_sp_crif *crif;
+
+ rcu_read_lock();
+ upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid);
+ rcu_read_unlock();
+
+ if (!upper_dev)
+ return;
+
+ crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev);
+ if (!crif || !crif->rif)
+ return;
+
+ mlxsw_sp_rif_destroy(crif->rif);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ int lower_pvid,
+ unsigned long event,
+ struct netlink_ext_ack *extack);
+
+int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *br_dev,
+ u16 new_vid, bool is_pvid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_rif *old_rif;
+ struct mlxsw_sp_rif *new_rif;
+ struct net_device *upper_dev;
+ u16 old_pvid = 0;
+ u16 new_pvid;
+ int err = 0;
+
+ mutex_lock(&mlxsw_sp->router->lock);
+ old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
+ if (old_rif) {
+ /* If the RIF on the bridge is not a VLAN RIF, we shouldn't have
+ * gotten a PVID notification.
+ */
+ if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN))
+ old_rif = NULL;
+ else
+ old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid);
+ }
+
+ if (is_pvid)
+ new_pvid = new_vid;
+ else if (old_pvid == new_vid)
+ new_pvid = 0;
+ else
+ goto out;
+
+ if (old_pvid == new_pvid)
+ goto out;
+
+ if (new_pvid) {
+ struct mlxsw_sp_rif_params params = {
+ .dev = br_dev,
+ .vid = new_pvid,
+ };
+
+ /* If there is a VLAN upper with the same VID as the new PVID,
+ * kill its RIF, if there is one.
+ */
+ mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid);
+
+ if (mlxsw_sp_dev_addr_list_empty(br_dev))
+ goto out;
+ new_rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
+ if (IS_ERR(new_rif)) {
+ err = PTR_ERR(new_rif);
+ goto out;
+ }
+
+ if (old_pvid)
+ mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif,
+ true);
+ } else {
+ mlxsw_sp_rif_destroy(old_rif);
+ }
+
+ if (old_pvid) {
+ rcu_read_lock();
+ upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q),
+ old_pvid);
+ rcu_read_unlock();
+ if (upper_dev)
+ err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp,
+ upper_dev,
+ new_pvid,
+ NETDEV_UP, extack);
+ }
+
+out:
+ mutex_unlock(&mlxsw_sp->router->lock);
+ return err;
+}
+
static void
mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
@@ -8664,21 +8890,24 @@ __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
{
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- struct mlxsw_sp_rif_params params = {
- .dev = l3_dev,
- };
+ struct mlxsw_sp_rif_params params;
u16 vid = mlxsw_sp_port_vlan->vid;
struct mlxsw_sp_rif *rif;
struct mlxsw_sp_fid *fid;
int err;
+ params = (struct mlxsw_sp_rif_params) {
+ .dev = l3_dev,
+ .vid = vid,
+ };
+
mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
if (IS_ERR(rif))
return PTR_ERR(rif);
/* FID was already created, just take a reference */
- fid = rif->ops->fid_get(rif, extack);
+ fid = rif->ops->fid_get(rif, &params, extack);
err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
if (err)
goto err_fid_port_vid_map;
@@ -8776,10 +9005,11 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
}
static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
- unsigned long event,
+ unsigned long event, bool nomaster,
struct netlink_ext_ack *extack)
{
- if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev))
+ if (!nomaster && (netif_is_any_bridge_port(port_dev) ||
+ netif_is_lag_port(port_dev)))
return 0;
return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
@@ -8810,10 +9040,10 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
}
static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
- unsigned long event,
+ unsigned long event, bool nomaster,
struct netlink_ext_ack *extack)
{
- if (netif_is_bridge_port(lag_dev))
+ if (!nomaster && netif_is_bridge_port(lag_dev))
return 0;
return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
@@ -8822,6 +9052,7 @@ static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
+ int lower_pvid,
unsigned long event,
struct netlink_ext_ack *extack)
{
@@ -8829,6 +9060,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
.dev = l3_dev,
};
struct mlxsw_sp_rif *rif;
+ int err;
switch (event) {
case NETDEV_UP:
@@ -8840,7 +9072,21 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
return -EOPNOTSUPP;
}
+ err = br_vlan_get_pvid(l3_dev, &params.vid);
+ if (err)
+ return err;
+ if (!params.vid)
+ return 0;
+ } else if (is_vlan_dev(l3_dev)) {
+ params.vid = vlan_dev_vlan_id(l3_dev);
+
+ /* If the VID matches PVID of the bridge below, the
+ * bridge owns the RIF for this VLAN. Don't do anything.
+ */
+ if ((int)params.vid == lower_pvid)
+ return 0;
}
+
rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
if (IS_ERR(rif))
return PTR_ERR(rif);
@@ -8856,24 +9102,32 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *vlan_dev,
- unsigned long event,
+ unsigned long event, bool nomaster,
struct netlink_ext_ack *extack)
{
struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
u16 vid = vlan_dev_vlan_id(vlan_dev);
+ u16 lower_pvid;
+ int err;
- if (netif_is_bridge_port(vlan_dev))
+ if (!nomaster && netif_is_bridge_port(vlan_dev))
return 0;
- if (mlxsw_sp_port_dev_check(real_dev))
+ if (mlxsw_sp_port_dev_check(real_dev)) {
return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
event, vid, extack);
- else if (netif_is_lag_master(real_dev))
+ } else if (netif_is_lag_master(real_dev)) {
return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
vid, extack);
- else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
- return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
+ } else if (netif_is_bridge_master(real_dev) &&
+ br_vlan_enabled(real_dev)) {
+ err = br_vlan_get_pvid(real_dev, &lower_pvid);
+ if (err)
+ return err;
+ return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev,
+ lower_pvid, event,
extack);
+ }
return 0;
}
@@ -8927,10 +9181,8 @@ static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
int err;
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
- if (!rif) {
- NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
- return -EOPNOTSUPP;
- }
+ if (!rif)
+ return 0;
err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), true);
@@ -9000,19 +9252,21 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev,
- unsigned long event,
+ unsigned long event, bool nomaster,
struct netlink_ext_ack *extack)
{
if (mlxsw_sp_port_dev_check(dev))
- return mlxsw_sp_inetaddr_port_event(dev, event, extack);
+ return mlxsw_sp_inetaddr_port_event(dev, event, nomaster,
+ extack);
else if (netif_is_lag_master(dev))
- return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
+ return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster,
+ extack);
else if (netif_is_bridge_master(dev))
- return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
+ return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event,
extack);
else if (is_vlan_dev(dev))
return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
- extack);
+ nomaster, extack);
else if (netif_is_macvlan(dev))
return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
extack);
@@ -9039,7 +9293,8 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
+ err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false,
+ NULL);
out:
mutex_unlock(&router->lock);
return notifier_from_errno(err);
@@ -9063,7 +9318,8 @@ static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
+ err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
+ ivi->extack);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return notifier_from_errno(err);
@@ -9092,7 +9348,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
+ __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL);
out:
mutex_unlock(&mlxsw_sp->router->lock);
rtnl_unlock();
@@ -9146,7 +9402,8 @@ static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
+ err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
+ i6vi->extack);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return notifier_from_errno(err);
@@ -9466,10 +9723,11 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
*/
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
if (rif)
- __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
+ __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false,
extack);
- return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
+ return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false,
+ extack);
}
static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
@@ -9480,7 +9738,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
if (!rif)
return;
- __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
+ __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL);
}
static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
@@ -9523,6 +9781,116 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
return err;
}
+struct mlxsw_sp_router_replay_inetaddr_up {
+ struct mlxsw_sp *mlxsw_sp;
+ struct netlink_ext_ack *extack;
+ unsigned int done;
+ bool deslavement;
+};
+
+static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
+ bool nomaster = ctx->deslavement;
+ struct mlxsw_sp_crif *crif;
+ int err;
+
+ if (mlxsw_sp_dev_addr_list_empty(dev))
+ return 0;
+
+ crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
+ if (!crif || crif->rif)
+ return 0;
+
+ if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
+ return 0;
+
+ err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP,
+ nomaster, ctx->extack);
+ if (err)
+ return err;
+
+ ctx->done++;
+ return 0;
+}
+
+static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
+ bool nomaster = ctx->deslavement;
+ struct mlxsw_sp_crif *crif;
+
+ if (!ctx->done)
+ return 0;
+
+ if (mlxsw_sp_dev_addr_list_empty(dev))
+ return 0;
+
+ crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
+ if (!crif || !crif->rif)
+ return 0;
+
+ /* We are rolling back NETDEV_UP, so ask for that. */
+ if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
+ return 0;
+
+ __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster,
+ NULL);
+
+ ctx->done--;
+ return 0;
+}
+
+int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_router_replay_inetaddr_up ctx = {
+ .mlxsw_sp = mlxsw_sp,
+ .extack = extack,
+ .deslavement = false,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &ctx,
+ };
+ int err;
+
+ err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv);
+ if (err)
+ return err;
+
+ err = netdev_walk_all_upper_dev_rcu(upper_dev,
+ mlxsw_sp_router_replay_inetaddr_up,
+ &priv);
+ if (err)
+ goto err_replay_up;
+
+ return 0;
+
+err_replay_up:
+ netdev_walk_all_upper_dev_rcu(upper_dev,
+ mlxsw_sp_router_unreplay_inetaddr_up,
+ &priv);
+ mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv);
+ return err;
+}
+
+void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev)
+{
+ struct mlxsw_sp_router_replay_inetaddr_up ctx = {
+ .mlxsw_sp = mlxsw_sp,
+ .deslavement = true,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &ctx,
+ };
+
+ mlxsw_sp_router_replay_inetaddr_up(dev, &priv);
+}
+
static int
mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
u16 vid, struct net_device *dev,
@@ -9539,15 +9907,84 @@ mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
dev, extack);
}
+static void
+mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ struct net_device *dev)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
+ vid);
+ if (WARN_ON(!mlxsw_sp_port_vlan))
+ return;
+
+ __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+}
+
static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *lag_dev,
struct netlink_ext_ack *extack)
{
u16 default_vid = MLXSW_SP_DEFAULT_VID;
+ struct net_device *upper_dev;
+ struct list_head *iter;
+ int done = 0;
+ u16 vid;
+ int err;
- return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port,
- default_vid, lag_dev,
- extack);
+ err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid,
+ lag_dev, extack);
+ if (err)
+ return err;
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ vid = vlan_dev_vlan_id(upper_dev);
+ err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid,
+ upper_dev, extack);
+ if (err)
+ goto err_router_join_dev;
+
+ ++done;
+ }
+
+ return 0;
+
+err_router_join_dev:
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+ if (!done--)
+ break;
+
+ vid = vlan_dev_vlan_id(upper_dev);
+ mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
+ }
+
+ mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
+ return err;
+}
+
+static void
+__mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ u16 default_vid = MLXSW_SP_DEFAULT_VID;
+ struct net_device *upper_dev;
+ struct list_head *iter;
+ u16 vid;
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ vid = vlan_dev_vlan_id(upper_dev);
+ mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
+ }
+
+ mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
}
int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -9563,6 +10000,14 @@ int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
return err;
}
+void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
+ __mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev);
+ mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
+}
+
static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -9608,6 +10053,40 @@ out:
return notifier_from_errno(err);
}
+struct mlxsw_sp_macvlan_replay {
+ struct mlxsw_sp *mlxsw_sp;
+ struct netlink_ext_ack *extack;
+};
+
+static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ const struct mlxsw_sp_macvlan_replay *rms = priv->data;
+ struct netlink_ext_ack *extack = rms->extack;
+ struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp;
+
+ if (!netif_is_macvlan(dev))
+ return 0;
+
+ return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack);
+}
+
+static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_macvlan_replay rms = {
+ .mlxsw_sp = rif->mlxsw_sp,
+ .extack = extack,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &rms,
+ };
+
+ return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif),
+ mlxsw_sp_macvlan_replay_upper,
+ &priv);
+}
+
static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
struct netdev_nested_priv *priv)
{
@@ -9630,7 +10109,6 @@ static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
if (!netif_is_macvlan_port(dev))
return 0;
- netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n");
return netdev_walk_all_upper_dev_rcu(dev,
__mlxsw_sp_rif_macvlan_flush, &priv);
}
@@ -9688,6 +10166,10 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
if (err)
goto err_rif_subport_op;
+ err = mlxsw_sp_macvlan_replay(rif, extack);
+ if (err)
+ goto err_macvlan_replay;
+
err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), true);
if (err)
@@ -9703,6 +10185,8 @@ err_fid_rif_set:
mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), false);
err_rif_fdb_op:
+ mlxsw_sp_rif_macvlan_flush(rif);
+err_macvlan_replay:
mlxsw_sp_rif_subport_op(rif, false);
err_rif_subport_op:
mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
@@ -9724,6 +10208,7 @@ static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
static struct mlxsw_sp_fid *
mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params,
struct netlink_ext_ack *extack)
{
return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
@@ -9788,6 +10273,10 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
if (err)
goto err_fid_bc_flood_set;
+ err = mlxsw_sp_macvlan_replay(rif, extack);
+ if (err)
+ goto err_macvlan_replay;
+
err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), true);
if (err)
@@ -9803,6 +10292,8 @@ err_fid_rif_set:
mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), false);
err_rif_fdb_op:
+ mlxsw_sp_rif_macvlan_flush(rif);
+err_macvlan_replay:
mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
mlxsw_sp_router_port(mlxsw_sp), false);
err_fid_bc_flood_set:
@@ -9836,6 +10327,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
static struct mlxsw_sp_fid *
mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params,
struct netlink_ext_ack *extack)
{
int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
@@ -9869,27 +10361,22 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
static struct mlxsw_sp_fid *
mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params,
struct netlink_ext_ack *extack)
{
struct net_device *dev = mlxsw_sp_rif_dev(rif);
struct net_device *br_dev;
- u16 vid;
- int err;
+
+ if (WARN_ON(!params->vid))
+ return ERR_PTR(-EINVAL);
if (is_vlan_dev(dev)) {
- vid = vlan_dev_vlan_id(dev);
br_dev = vlan_dev_real_dev(dev);
if (WARN_ON(!netif_is_bridge_master(br_dev)))
return ERR_PTR(-EINVAL);
- } else {
- err = br_vlan_get_pvid(dev, &vid);
- if (err < 0 || !vid) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
- return ERR_PTR(-EINVAL);
- }
}
- return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
+ return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid);
}
static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
@@ -9954,6 +10441,10 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
if (err)
goto err_fid_bc_flood_set;
+ err = mlxsw_sp_macvlan_replay(rif, extack);
+ if (err)
+ goto err_macvlan_replay;
+
err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), true);
if (err)
@@ -9969,6 +10460,8 @@ err_fid_rif_set:
mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
mlxsw_sp_fid_index(rif->fid), false);
err_rif_fdb_op:
+ mlxsw_sp_rif_macvlan_flush(rif);
+err_macvlan_replay:
mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
mlxsw_sp_router_port(mlxsw_sp), false);
err_fid_bc_flood_set:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 9a2669a08480..ed3b628caafe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -171,8 +171,19 @@ int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp);
struct net_device *
mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev);
+int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev,
+ u16 new_vid, bool is_pvid,
+ struct netlink_ext_ack *extack);
int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *lag_dev,
struct netlink_ext_ack *extack);
+void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev);
+int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev);
#endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index d88e62bc759f..dffb67c1038e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -384,6 +384,91 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
return __mlxsw_sp_bridge_port_find(bridge_device, brport_dev);
}
+static int mlxsw_sp_port_obj_add(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack);
+static int mlxsw_sp_port_obj_del(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj);
+
+struct mlxsw_sp_bridge_port_replay_switchdev_objs {
+ struct net_device *brport_dev;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int done;
+};
+
+static int
+mlxsw_sp_bridge_port_replay_switchdev_objs(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_port_obj_info *port_obj_info = ptr;
+ struct netlink_ext_ack *extack = port_obj_info->info.extack;
+ struct mlxsw_sp_bridge_port_replay_switchdev_objs *rso;
+ int err = 0;
+
+ rso = (void *)port_obj_info->info.ctx;
+
+ if (event != SWITCHDEV_PORT_OBJ_ADD ||
+ dev != rso->brport_dev)
+ goto out;
+
+ /* When a port is joining the bridge through a LAG, there likely are
+ * VLANs configured on that LAG already. The replay will thus attempt to
+ * have the given port-vlans join the corresponding FIDs. But the LAG
+ * netdevice has already called the ndo_vlan_rx_add_vid NDO for its VLAN
+ * memberships, back before CHANGEUPPER was distributed and netdevice
+ * master set. So now before propagating the VLAN events further, we
+ * first need to kill the corresponding VID at the mlxsw_sp_port.
+ *
+ * Note that this doesn't need to be rolled back on failure -- if the
+ * replay fails, the enslavement is off, and the VIDs would be killed by
+ * LAG anyway as part of its rollback.
+ */
+ if (port_obj_info->obj->id == SWITCHDEV_OBJ_ID_PORT_VLAN) {
+ u16 vid = SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj)->vid;
+
+ err = mlxsw_sp_port_kill_vid(rso->mlxsw_sp_port->dev, 0, vid);
+ if (err)
+ goto out;
+ }
+
+ ++rso->done;
+ err = mlxsw_sp_port_obj_add(rso->mlxsw_sp_port->dev, NULL,
+ port_obj_info->obj, extack);
+
+out:
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block mlxsw_sp_bridge_port_replay_switchdev_objs_nb = {
+ .notifier_call = mlxsw_sp_bridge_port_replay_switchdev_objs,
+};
+
+static int
+mlxsw_sp_bridge_port_unreplay_switchdev_objs(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_port_obj_info *port_obj_info = ptr;
+ struct mlxsw_sp_bridge_port_replay_switchdev_objs *rso;
+
+ rso = (void *)port_obj_info->info.ctx;
+
+ if (event != SWITCHDEV_PORT_OBJ_ADD ||
+ dev != rso->brport_dev)
+ return NOTIFY_DONE;
+ if (!rso->done--)
+ return NOTIFY_STOP;
+
+ mlxsw_sp_port_obj_del(rso->mlxsw_sp_port->dev, NULL,
+ port_obj_info->obj);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block mlxsw_sp_bridge_port_unreplay_switchdev_objs_nb = {
+ .notifier_call = mlxsw_sp_bridge_port_unreplay_switchdev_objs,
+};
+
static struct mlxsw_sp_bridge_port *
mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
struct net_device *brport_dev,
@@ -1479,29 +1564,15 @@ err_port_vlan_set:
}
static int
-mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *br_dev,
- const struct switchdev_obj_port_vlan *vlan)
+mlxsw_sp_br_rif_pvid_change(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *br_dev,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
{
- u16 pvid;
-
- pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev);
- if (!pvid)
- return 0;
-
- if (vlan->flags & BRIDGE_VLAN_INFO_PVID) {
- if (vlan->vid != pvid) {
- netdev_err(br_dev, "Can't change PVID, it's used by router interface\n");
- return -EBUSY;
- }
- } else {
- if (vlan->vid == pvid) {
- netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n");
- return -EBUSY;
- }
- }
+ bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
- return 0;
+ return mlxsw_sp_router_bridge_vlan_add(mlxsw_sp, br_dev, vlan->vid,
+ flag_pvid, extack);
}
static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -1518,8 +1589,8 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
int err = 0;
if (br_vlan_enabled(orig_dev))
- err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp,
- orig_dev, vlan);
+ err = mlxsw_sp_br_rif_pvid_change(mlxsw_sp, orig_dev,
+ vlan, extack);
if (!err)
err = -EOPNOTSUPP;
return err;
@@ -2365,6 +2436,33 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
}
static int
+mlxsw_sp_bridge_port_replay(struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_bridge_port_replay_switchdev_objs rso = {
+ .brport_dev = bridge_port->dev,
+ .mlxsw_sp_port = mlxsw_sp_port,
+ };
+ struct notifier_block *nb;
+ int err;
+
+ nb = &mlxsw_sp_bridge_port_replay_switchdev_objs_nb;
+ err = switchdev_bridge_port_replay(bridge_port->dev, mlxsw_sp_port->dev,
+ &rso, NULL, nb, extack);
+ if (err)
+ goto err_replay;
+
+ return 0;
+
+err_replay:
+ nb = &mlxsw_sp_bridge_port_unreplay_switchdev_objs_nb;
+ switchdev_bridge_port_replay(bridge_port->dev, mlxsw_sp_port->dev,
+ &rso, NULL, nb, extack);
+ return err;
+}
+
+static int
mlxsw_sp_bridge_vlan_aware_port_join(struct mlxsw_sp_bridge_port *bridge_port,
struct mlxsw_sp_port *mlxsw_sp_port,
struct netlink_ext_ack *extack)
@@ -2378,7 +2476,7 @@ mlxsw_sp_bridge_vlan_aware_port_join(struct mlxsw_sp_bridge_port *bridge_port,
if (mlxsw_sp_port->default_vlan->fid)
mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan);
- return 0;
+ return mlxsw_sp_bridge_port_replay(bridge_port, mlxsw_sp_port, extack);
}
static int
@@ -2550,6 +2648,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
struct net_device *dev = bridge_port->dev;
u16 vid;
+ int err;
vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID;
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
@@ -2565,8 +2664,20 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
if (mlxsw_sp_port_vlan->fid)
mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
- return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port,
- extack);
+ err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port,
+ extack);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_bridge_port_replay(bridge_port, mlxsw_sp_port, extack);
+ if (err)
+ goto err_replay;
+
+ return 0;
+
+err_replay:
+ mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
+ return err;
}
static void
@@ -2783,8 +2894,15 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
if (err)
goto err_port_join;
+ err = mlxsw_sp_netdevice_enslavement_replay(mlxsw_sp, br_dev, extack);
+ if (err)
+ goto err_replay;
+
return 0;
+err_replay:
+ bridge_device->ops->port_leave(bridge_device, bridge_port,
+ mlxsw_sp_port);
err_port_join:
mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
return err;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8f3f78b68592..3765d3389a9a 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -300,8 +300,11 @@ static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index,
void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
{
+ /* Hardware Spec specifies that software client should set 0 for
+ * wqe_cnt for Receive Queues. This value is not used in Send Queues.
+ */
mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type,
- queue->id, queue->head * GDMA_WQE_BU_SIZE, 1);
+ queue->id, queue->head * GDMA_WQE_BU_SIZE, 0);
}
void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..ac2acc9aca9d 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1386,8 +1386,8 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
recv_buf_oob = &rxq->rx_oobs[curr_index];
- err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req,
- &recv_buf_oob->wqe_inf);
+ err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req,
+ &recv_buf_oob->wqe_inf);
if (WARN_ON_ONCE(err))
return;
@@ -1657,6 +1657,12 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
mana_process_rx_cqe(rxq, cq, &comp[i]);
}
+ if (comp_read > 0) {
+ struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
+
+ mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq);
+ }
+
if (rxq->xdp_flush)
xdp_do_flush();
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 6b1fb5708434..f18c791cf698 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -924,7 +924,7 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
*/
static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
{
- u32 new_ctrl, update;
+ u32 new_ctrl, new_ctrl_w1, update;
unsigned int r;
int err;
@@ -937,14 +937,29 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
- nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
- nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+ if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN)) {
+ nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
+ nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+ }
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
err = nfp_net_reconfig(nn, update);
if (err)
nn_err(nn, "Could not disable device: %d\n", err);
+ if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN) {
+ new_ctrl_w1 = nn->dp.ctrl_w1;
+ new_ctrl_w1 &= ~NFP_NET_CFG_CTRL_FREELIST_EN;
+ nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
+ nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+
+ nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
+ err = nfp_net_reconfig(nn, update);
+ if (err)
+ nn_err(nn, "Could not disable FREELIST_EN: %d\n", err);
+ nn->dp.ctrl_w1 = new_ctrl_w1;
+ }
+
for (r = 0; r < nn->dp.num_rx_rings; r++) {
nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx))
@@ -964,11 +979,12 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
*/
static int nfp_net_set_config_and_enable(struct nfp_net *nn)
{
- u32 bufsz, new_ctrl, update = 0;
+ u32 bufsz, new_ctrl, new_ctrl_w1, update = 0;
unsigned int r;
int err;
new_ctrl = nn->dp.ctrl;
+ new_ctrl_w1 = nn->dp.ctrl_w1;
if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
nfp_net_rss_write_key(nn);
@@ -1001,16 +1017,25 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
- /* Enable device */
- new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+ /* Enable device
+ * Step 1: Replace the CTRL_ENABLE by NFP_NET_CFG_CTRL_FREELIST_EN if
+ * FREELIST_EN exits.
+ */
+ if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN)
+ new_ctrl_w1 |= NFP_NET_CFG_CTRL_FREELIST_EN;
+ else
+ new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
update |= NFP_NET_CFG_UPDATE_GEN;
update |= NFP_NET_CFG_UPDATE_MSIX;
update |= NFP_NET_CFG_UPDATE_RING;
if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
+ /* Step 2: Send the configuration and write the freelist.
+ * - The freelist only need to be written once.
+ */
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
- nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, nn->dp.ctrl_w1);
+ nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
err = nfp_net_reconfig(nn, update);
if (err) {
nfp_net_clear_config_and_disable(nn);
@@ -1018,10 +1043,25 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
}
nn->dp.ctrl = new_ctrl;
+ nn->dp.ctrl_w1 = new_ctrl_w1;
for (r = 0; r < nn->dp.num_rx_rings; r++)
nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
+ /* Step 3: Do the NFP_NET_CFG_CTRL_ENABLE. Send the configuration.
+ */
+ if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN) {
+ new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+ nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+
+ err = nfp_net_reconfig(nn, update);
+ if (err) {
+ nfp_net_clear_config_and_disable(nn);
+ return err;
+ }
+ nn->dp.ctrl = new_ctrl;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 669b9dccb6a9..3e63f6d6a563 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -268,6 +268,7 @@
#define NFP_NET_CFG_CTRL_PKT_TYPE (0x1 << 0) /* Pkttype offload */
#define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */
#define NFP_NET_CFG_CTRL_MCAST_FILTER (0x1 << 2) /* Multicast Filter */
+#define NFP_NET_CFG_CTRL_FREELIST_EN (0x1 << 6) /* Freelist enable flag bit */
#define NFP_NET_CFG_CAP_WORD1 0x00a4
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index ab7d217b98b3..d6ce113a4210 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -213,29 +213,18 @@ out:
return ret;
}
-static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static void ionic_clear_pci(struct ionic *ionic)
{
- struct device *dev = &pdev->dev;
- struct ionic *ionic;
- int num_vfs;
- int err;
-
- ionic = ionic_devlink_alloc(dev);
- if (!ionic)
- return -ENOMEM;
-
- ionic->pdev = pdev;
- ionic->dev = dev;
- pci_set_drvdata(pdev, ionic);
- mutex_init(&ionic->dev_cmd_lock);
+ ionic_unmap_bars(ionic);
+ pci_release_regions(ionic->pdev);
+ pci_disable_device(ionic->pdev);
+}
- /* Query system for DMA addressing limitation for the device. */
- err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN));
- if (err) {
- dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting. err=%d\n",
- err);
- goto err_out_clear_drvdata;
- }
+static int ionic_setup_one(struct ionic *ionic)
+{
+ struct pci_dev *pdev = ionic->pdev;
+ struct device *dev = ionic->dev;
+ int err;
ionic_debugfs_add_dev(ionic);
@@ -249,20 +238,19 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = pci_request_regions(pdev, IONIC_DRV_NAME);
if (err) {
dev_err(dev, "Cannot request PCI regions: %d, aborting\n", err);
- goto err_out_pci_disable_device;
+ goto err_out_clear_pci;
}
-
pcie_print_link_status(pdev);
err = ionic_map_bars(ionic);
if (err)
- goto err_out_pci_release_regions;
+ goto err_out_clear_pci;
/* Configure the device */
err = ionic_setup(ionic);
if (err) {
dev_err(dev, "Cannot setup device: %d, aborting\n", err);
- goto err_out_unmap_bars;
+ goto err_out_clear_pci;
}
pci_set_master(pdev);
@@ -279,24 +267,64 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_teardown;
}
- /* Configure the ports */
+ /* Configure the port */
err = ionic_port_identify(ionic);
if (err) {
dev_err(dev, "Cannot identify port: %d, aborting\n", err);
- goto err_out_reset;
+ goto err_out_teardown;
}
err = ionic_port_init(ionic);
if (err) {
dev_err(dev, "Cannot init port: %d, aborting\n", err);
- goto err_out_reset;
+ goto err_out_teardown;
+ }
+
+ return 0;
+
+err_out_teardown:
+ ionic_dev_teardown(ionic);
+err_out_clear_pci:
+ ionic_clear_pci(ionic);
+err_out_debugfs_del_dev:
+ ionic_debugfs_del_dev(ionic);
+
+ return err;
+}
+
+static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct ionic *ionic;
+ int num_vfs;
+ int err;
+
+ ionic = ionic_devlink_alloc(dev);
+ if (!ionic)
+ return -ENOMEM;
+
+ ionic->pdev = pdev;
+ ionic->dev = dev;
+ pci_set_drvdata(pdev, ionic);
+ mutex_init(&ionic->dev_cmd_lock);
+
+ /* Query system for DMA addressing limitation for the device. */
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN));
+ if (err) {
+ dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting. err=%d\n",
+ err);
+ goto err_out;
}
+ err = ionic_setup_one(ionic);
+ if (err)
+ goto err_out;
+
/* Allocate and init the LIF */
err = ionic_lif_size(ionic);
if (err) {
dev_err(dev, "Cannot size LIF: %d, aborting\n", err);
- goto err_out_port_reset;
+ goto err_out_pci;
}
err = ionic_lif_alloc(ionic);
@@ -347,21 +375,10 @@ err_out_free_lifs:
ionic->lif = NULL;
err_out_free_irqs:
ionic_bus_free_irq_vectors(ionic);
-err_out_port_reset:
- ionic_port_reset(ionic);
-err_out_reset:
- ionic_reset(ionic);
-err_out_teardown:
+err_out_pci:
ionic_dev_teardown(ionic);
-err_out_unmap_bars:
- ionic_unmap_bars(ionic);
-err_out_pci_release_regions:
- pci_release_regions(pdev);
-err_out_pci_disable_device:
- pci_disable_device(pdev);
-err_out_debugfs_del_dev:
- ionic_debugfs_del_dev(ionic);
-err_out_clear_drvdata:
+ ionic_clear_pci(ionic);
+err_out:
mutex_destroy(&ionic->dev_cmd_lock);
ionic_devlink_free(ionic);
@@ -386,20 +403,71 @@ static void ionic_remove(struct pci_dev *pdev)
ionic_port_reset(ionic);
ionic_reset(ionic);
ionic_dev_teardown(ionic);
- ionic_unmap_bars(ionic);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
+ ionic_clear_pci(ionic);
ionic_debugfs_del_dev(ionic);
mutex_destroy(&ionic->dev_cmd_lock);
ionic_devlink_free(ionic);
}
+static void ionic_reset_prepare(struct pci_dev *pdev)
+{
+ struct ionic *ionic = pci_get_drvdata(pdev);
+ struct ionic_lif *lif = ionic->lif;
+
+ dev_dbg(ionic->dev, "%s: device stopping\n", __func__);
+
+ del_timer_sync(&ionic->watchdog_timer);
+ cancel_work_sync(&lif->deferred.work);
+
+ mutex_lock(&lif->queue_lock);
+ ionic_stop_queues_reconfig(lif);
+ ionic_txrx_free(lif);
+ ionic_lif_deinit(lif);
+ ionic_qcqs_free(lif);
+ mutex_unlock(&lif->queue_lock);
+
+ ionic_dev_teardown(ionic);
+ ionic_clear_pci(ionic);
+ ionic_debugfs_del_dev(ionic);
+}
+
+static void ionic_reset_done(struct pci_dev *pdev)
+{
+ struct ionic *ionic = pci_get_drvdata(pdev);
+ struct ionic_lif *lif = ionic->lif;
+ int err;
+
+ err = ionic_setup_one(ionic);
+ if (err)
+ goto err_out;
+
+ ionic_debugfs_add_sizes(ionic);
+ ionic_debugfs_add_lif(ionic->lif);
+
+ err = ionic_restart_lif(lif);
+ if (err)
+ goto err_out;
+
+ mod_timer(&ionic->watchdog_timer, jiffies + 1);
+
+err_out:
+ dev_dbg(ionic->dev, "%s: device recovery %s\n",
+ __func__, err ? "failed" : "done");
+}
+
+static const struct pci_error_handlers ionic_err_handler = {
+ /* FLR handling */
+ .reset_prepare = ionic_reset_prepare,
+ .reset_done = ionic_reset_done,
+};
+
static struct pci_driver ionic_driver = {
.name = IONIC_DRV_NAME,
.id_table = ionic_id_table,
.probe = ionic_probe,
.remove = ionic_remove,
.sriov_configure = ionic_sriov_configure,
+ .err_handler = &ionic_err_handler
};
int ionic_bus_register_driver(void)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 612b0015dc43..adc05f944c14 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -434,7 +434,7 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
}
}
-static void ionic_qcqs_free(struct ionic_lif *lif)
+void ionic_qcqs_free(struct ionic_lif *lif)
{
struct device *dev = lif->ionic->dev;
struct ionic_qcq *adminqcq;
@@ -1754,7 +1754,7 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
return ionic_lif_addr_add(netdev_priv(netdev), mac);
}
-static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
+void ionic_stop_queues_reconfig(struct ionic_lif *lif)
{
/* Stop and clean the queues before reconfiguration */
netif_device_detach(lif->netdev);
@@ -2009,7 +2009,7 @@ static void ionic_txrx_deinit(struct ionic_lif *lif)
}
}
-static void ionic_txrx_free(struct ionic_lif *lif)
+void ionic_txrx_free(struct ionic_lif *lif)
{
unsigned int i;
@@ -3266,27 +3266,11 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
dev_info(ionic->dev, "FW Down: LIFs stopped\n");
}
-static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
+int ionic_restart_lif(struct ionic_lif *lif)
{
struct ionic *ionic = lif->ionic;
int err;
- if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
- return;
-
- dev_info(ionic->dev, "FW Up: restarting LIFs\n");
-
- ionic_init_devinfo(ionic);
- err = ionic_identify(ionic);
- if (err)
- goto err_out;
- err = ionic_port_identify(ionic);
- if (err)
- goto err_out;
- err = ionic_port_init(ionic);
- if (err)
- goto err_out;
-
mutex_lock(&lif->queue_lock);
if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
@@ -3322,12 +3306,8 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
ionic_link_status_check_request(lif, CAN_SLEEP);
netif_device_attach(lif->netdev);
- dev_info(ionic->dev, "FW Up: LIFs restarted\n");
-
- /* restore the hardware timestamping queues */
- ionic_lif_hwstamp_replay(lif);
- return;
+ return 0;
err_txrx_free:
ionic_txrx_free(lif);
@@ -3337,6 +3317,46 @@ err_qcqs_free:
ionic_qcqs_free(lif);
err_unlock:
mutex_unlock(&lif->queue_lock);
+
+ return err;
+}
+
+static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
+{
+ struct ionic *ionic = lif->ionic;
+ int err;
+
+ if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ return;
+
+ dev_info(ionic->dev, "FW Up: restarting LIFs\n");
+
+ /* This is a little different from what happens at
+ * probe time because the LIF already exists so we
+ * just need to reanimate it.
+ */
+ ionic_init_devinfo(ionic);
+ err = ionic_identify(ionic);
+ if (err)
+ goto err_out;
+ err = ionic_port_identify(ionic);
+ if (err)
+ goto err_out;
+ err = ionic_port_init(ionic);
+ if (err)
+ goto err_out;
+
+ err = ionic_restart_lif(lif);
+ if (err)
+ goto err_out;
+
+ dev_info(ionic->dev, "FW Up: LIFs restarted\n");
+
+ /* restore the hardware timestamping queues */
+ ionic_lif_hwstamp_replay(lif);
+
+ return;
+
err_out:
dev_err(ionic->dev, "FW Up: LIFs restart failed - err %d\n", err);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index fd2ea670e7d8..457c24195ca6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -325,6 +325,11 @@ void ionic_lif_deinit(struct ionic_lif *lif);
int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
+void ionic_stop_queues_reconfig(struct ionic_lif *lif);
+void ionic_txrx_free(struct ionic_lif *lif);
+void ionic_qcqs_free(struct ionic_lif *lif);
+int ionic_restart_lif(struct ionic_lif *lif);
+
int ionic_lif_register(struct ionic_lif *lif);
void ionic_lif_unregister(struct ionic_lif *lif);
int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 174dc8908b72..c362bff3cb83 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -552,7 +552,7 @@ static void smsc911x_mac_write(struct smsc911x_data *pdata,
/* Get a phy register */
static int smsc911x_mii_read(struct mii_bus *bus, int phyaddr, int regidx)
{
- struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv;
+ struct smsc911x_data *pdata = bus->priv;
unsigned long flags;
unsigned int addr;
int i, reg;
@@ -591,7 +591,7 @@ out:
static int smsc911x_mii_write(struct mii_bus *bus, int phyaddr, int regidx,
u16 val)
{
- struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv;
+ struct smsc911x_data *pdata = bus->priv;
unsigned long flags;
unsigned int addr;
int i, reg;
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 71fbb358bb7d..3b26f1d86beb 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -102,7 +102,7 @@ static inline void smsc9420_pci_flush_write(struct smsc9420_pdata *pd)
static int smsc9420_mii_read(struct mii_bus *bus, int phyaddr, int regidx)
{
- struct smsc9420_pdata *pd = (struct smsc9420_pdata *)bus->priv;
+ struct smsc9420_pdata *pd = bus->priv;
unsigned long flags;
u32 addr;
int i, reg = -EIO;
@@ -140,7 +140,7 @@ out:
static int smsc9420_mii_write(struct mii_bus *bus, int phyaddr, int regidx,
u16 val)
{
- struct smsc9420_pdata *pd = (struct smsc9420_pdata *)bus->priv;
+ struct smsc9420_pdata *pd = bus->priv;
unsigned long flags;
u32 addr;
int i, reg = -EIO;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 16e67c18b6f7..57f2137bbe9d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -59,13 +59,25 @@
/* #define FRAME_FILTER_DEBUG */
struct stmmac_txq_stats {
- unsigned long tx_pkt_n;
- unsigned long tx_normal_irq_n;
+ u64 tx_bytes;
+ u64 tx_packets;
+ u64 tx_pkt_n;
+ u64 tx_normal_irq_n;
+ u64 napi_poll;
+ u64 tx_clean;
+ u64 tx_set_ic_bit;
+ u64 tx_tso_frames;
+ u64 tx_tso_nfrags;
+ struct u64_stats_sync syncp;
};
struct stmmac_rxq_stats {
- unsigned long rx_pkt_n;
- unsigned long rx_normal_irq_n;
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 rx_pkt_n;
+ u64 rx_normal_irq_n;
+ u64 napi_poll;
+ struct u64_stats_sync syncp;
};
/* Extra statistic and debug information exposed by ethtool */
@@ -81,6 +93,7 @@ struct stmmac_extra_stats {
unsigned long tx_frame_flushed;
unsigned long tx_payload_error;
unsigned long tx_ip_header_error;
+ unsigned long tx_collision;
/* Receive errors */
unsigned long rx_desc;
unsigned long sa_filter_fail;
@@ -113,14 +126,6 @@ struct stmmac_extra_stats {
/* Tx/Rx IRQ Events */
unsigned long rx_early_irq;
unsigned long threshold;
- unsigned long tx_pkt_n;
- unsigned long rx_pkt_n;
- unsigned long normal_irq_n;
- unsigned long rx_normal_irq_n;
- unsigned long napi_poll;
- unsigned long tx_normal_irq_n;
- unsigned long tx_clean;
- unsigned long tx_set_ic_bit;
unsigned long irq_receive_pmt_irq_n;
/* MMC info */
unsigned long mmc_tx_irq_n;
@@ -190,18 +195,16 @@ struct stmmac_extra_stats {
unsigned long mtl_rx_fifo_ctrl_active;
unsigned long mac_rx_frame_ctrl_fifo;
unsigned long mac_gmii_rx_proto_engine;
- /* TSO */
- unsigned long tx_tso_frames;
- unsigned long tx_tso_nfrags;
/* EST */
unsigned long mtl_est_cgce;
unsigned long mtl_est_hlbs;
unsigned long mtl_est_hlbf;
unsigned long mtl_est_btre;
unsigned long mtl_est_btrlm;
- /* per queue statistics */
- struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
- struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
+ unsigned long rx_dropped;
+ unsigned long rx_errors;
+ unsigned long tx_dropped;
+ unsigned long tx_errors;
};
/* Safety Feature statistics exposed by ethtool */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 9f88530c5e8c..b5efd9c2eac7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -113,7 +113,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
/* dwc-qos needs GMAC4, AAL, TSO and PMT */
plat_dat->has_gmac4 = 1;
plat_dat->dma_cfg->aal = 1;
- plat_dat->tso_en = 1;
+ plat_dat->flags |= STMMAC_FLAG_TSO_EN;
plat_dat->pmt = 1;
return 0;
@@ -359,7 +359,7 @@ bypass_clk_reset_gpio:
data->fix_mac_speed = tegra_eqos_fix_speed;
data->init = tegra_eqos_init;
data->bsp_priv = eqos;
- data->sph_disable = 1;
+ data->flags |= STMMAC_FLAG_SPH_DISABLE;
err = tegra_eqos_init(pdev, eqos);
if (err < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index ab9f876b6df7..0ffae785d8bd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -326,10 +326,10 @@ static int intel_crosststamp(ktime_t *device,
/* Both internal crosstimestamping and external triggered event
* timestamping cannot be run concurrently.
*/
- if (priv->plat->ext_snapshot_en)
+ if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN)
return -EBUSY;
- priv->plat->int_snapshot_en = 1;
+ priv->plat->flags |= STMMAC_FLAG_INT_SNAPSHOT_EN;
mutex_lock(&priv->aux_ts_lock);
/* Enable Internal snapshot trigger */
@@ -350,7 +350,7 @@ static int intel_crosststamp(ktime_t *device,
break;
default:
mutex_unlock(&priv->aux_ts_lock);
- priv->plat->int_snapshot_en = 0;
+ priv->plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN;
return -EINVAL;
}
writel(acr_value, ptpaddr + PTP_ACR);
@@ -376,7 +376,7 @@ static int intel_crosststamp(ktime_t *device,
if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
stmmac_cross_ts_isr(priv),
HZ / 100)) {
- priv->plat->int_snapshot_en = 0;
+ priv->plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN;
return -ETIMEDOUT;
}
@@ -395,7 +395,7 @@ static int intel_crosststamp(ktime_t *device,
}
system->cycles *= intel_priv->crossts_adj;
- priv->plat->int_snapshot_en = 0;
+ priv->plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN;
return 0;
}
@@ -458,8 +458,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->has_gmac = 0;
plat->has_gmac4 = 1;
plat->force_sf_dma_mode = 0;
- plat->tso_en = 1;
- plat->sph_disable = 1;
+ plat->flags |= (STMMAC_FLAG_TSO_EN | STMMAC_FLAG_SPH_DISABLE);
/* Multiplying factor to the clk_eee_i clock time
* period to make it closer to 100 ns. This value
@@ -561,7 +560,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
/* Set the maxmtu to a default of JUMBO_LEN */
plat->maxmtu = JUMBO_LEN;
- plat->vlan_fail_q_en = true;
+ plat->flags |= STMMAC_FLAG_VLAN_FAIL_Q_EN;
/* Use the last Rx queue */
plat->vlan_fail_q = plat->rx_queues_to_use - 1;
@@ -610,7 +609,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->ext_snapshot_num = AUX_SNAPSHOT0;
plat->crosststamp = intel_crosststamp;
- plat->int_snapshot_en = 0;
+ plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN;
/* Setup MSI vector offset specific to Intel mGbE controller */
plat->msi_mac_vec = 29;
@@ -628,7 +627,7 @@ static int ehl_common_data(struct pci_dev *pdev,
{
plat->rx_queues_to_use = 8;
plat->tx_queues_to_use = 8;
- plat->use_phy_wol = 1;
+ plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
plat->safety_feat_cfg->tsoee = 1;
plat->safety_feat_cfg->mrxpee = 1;
@@ -954,7 +953,7 @@ static int stmmac_config_single_msi(struct pci_dev *pdev,
res->irq = pci_irq_vector(pdev, 0);
res->wol_irq = res->irq;
- plat->multi_msi_en = 0;
+ plat->flags &= ~STMMAC_FLAG_MULTI_MSI_EN;
dev_info(&pdev->dev, "%s: Single IRQ enablement successful\n",
__func__);
@@ -1006,7 +1005,7 @@ static int stmmac_config_multi_msi(struct pci_dev *pdev,
if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX)
res->sfty_ue_irq = pci_irq_vector(pdev, plat->msi_sfty_ue_vec);
- plat->multi_msi_en = 1;
+ plat->flags |= STMMAC_FLAG_MULTI_MSI_EN;
dev_info(&pdev->dev, "%s: multi MSI enablement successful\n", __func__);
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 73c1dfa7ecb1..4d877d75642d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -588,7 +588,10 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
int i;
plat->interface = priv_plat->phy_mode;
- plat->use_phy_wol = priv_plat->mac_wol ? 0 : 1;
+ if (priv_plat->mac_wol)
+ plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
+ else
+ plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL;
plat->riwt_off = 1;
plat->maxmtu = ETH_DATA_LEN;
plat->host_dma_width = priv_plat->variant->dma_bit_mask;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index e62940414e54..735525ba8b93 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -4,10 +4,10 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/of_net.h>
#include <linux/platform_device.h>
#include <linux/phy.h>
#include <linux/phy/phy.h>
-#include <linux/property.h>
#include "stmmac.h"
#include "stmmac_platform.h"
@@ -104,7 +104,7 @@ struct qcom_ethqos {
struct clk *link_clk;
struct phy *serdes_phy;
unsigned int speed;
- int phy_mode;
+ phy_interface_t phy_mode;
const struct ethqos_emac_por *por;
unsigned int num_por;
@@ -706,12 +706,13 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
ret = stmmac_get_platform_resources(pdev, &stmmac_res);
if (ret)
- return ret;
+ return dev_err_probe(dev, ret,
+ "Failed to get platform resources\n");
plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat)) {
- dev_err(dev, "dt configuration failed\n");
- return PTR_ERR(plat_dat);
+ return dev_err_probe(dev, PTR_ERR(plat_dat),
+ "dt configuration failed\n");
}
plat_dat->clks_config = ethqos_clks_config;
@@ -720,7 +721,9 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
if (!ethqos)
return -ENOMEM;
- ethqos->phy_mode = device_get_phy_mode(dev);
+ ret = of_get_phy_mode(np, &ethqos->phy_mode);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to get phy mode\n");
switch (ethqos->phy_mode) {
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
@@ -731,16 +734,17 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
case PHY_INTERFACE_MODE_SGMII:
ethqos->configure_func = ethqos_configure_sgmii;
break;
- case -ENODEV:
- return -ENODEV;
default:
+ dev_err(dev, "Unsupported phy mode %s\n",
+ phy_modes(ethqos->phy_mode));
return -EINVAL;
}
ethqos->pdev = pdev;
ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii");
if (IS_ERR(ethqos->rgmii_base))
- return PTR_ERR(ethqos->rgmii_base);
+ return dev_err_probe(dev, PTR_ERR(ethqos->rgmii_base),
+ "Failed to map rgmii resource\n");
ethqos->mac_base = stmmac_res.addr;
@@ -752,7 +756,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii");
if (IS_ERR(ethqos->link_clk))
- return PTR_ERR(ethqos->link_clk);
+ return dev_err_probe(dev, PTR_ERR(ethqos->link_clk),
+ "Failed to get link_clk\n");
ret = ethqos_clks_config(ethqos, true);
if (ret)
@@ -764,7 +769,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes");
if (IS_ERR(ethqos->serdes_phy))
- return PTR_ERR(ethqos->serdes_phy);
+ return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy),
+ "Failed to get serdes phy\n");
ethqos->speed = SPEED_1000;
ethqos_update_link_clk(ethqos, SPEED_1000);
@@ -777,10 +783,12 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
if (ethqos->has_emac_ge_3)
plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
plat_dat->pmt = 1;
- plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
+ if (of_property_read_bool(np, "snps,tso"))
+ plat_dat->flags |= STMMAC_FLAG_TSO_EN;
if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
- plat_dat->rx_clk_runs_in_lpi = 1;
- plat_dat->has_integrated_pcs = data->has_integrated_pcs;
+ plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI;
+ if (data->has_integrated_pcs)
+ plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS;
if (ethqos->serdes_phy) {
plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 1e714380d125..b607279e8cbd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -440,8 +440,10 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
- u32 v;
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
int ret = 0;
+ u32 v;
v = readl(ioaddr + EMAC_INT_STA);
@@ -452,7 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_TX_INT) {
ret |= handle_tx;
- x->tx_normal_irq_n++;
+ u64_stats_update_begin(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_normal_irq_n++;
+ u64_stats_update_end(&tx_q->txq_stats.syncp);
}
if (v & EMAC_TX_DMA_STOP_INT)
@@ -474,7 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_RX_INT) {
ret |= handle_rx;
- x->rx_normal_irq_n++;
+ u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_normal_irq_n++;
+ u64_stats_update_end(&rx_q->rxq_stats.syncp);
}
if (v & EMAC_RX_BUF_UA_INT)
@@ -1227,7 +1233,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
plat_dat->interface = interface;
plat_dat->rx_coe = STMMAC_RX_COE_TYPE2;
plat_dat->tx_coe = 1;
- plat_dat->has_sun8i = true;
+ plat_dat->flags |= STMMAC_FLAG_HAS_SUN8I;
plat_dat->bsp_priv = gmac;
plat_dat->init = sun8i_dwmac_init;
plat_dat->exit = sun8i_dwmac_exit;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index f8367c5b490b..99e2e5a5cd60 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -290,7 +290,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
}
plat->has_xgmac = 1;
- plat->tso_en = 1;
+ plat->flags |= STMMAC_FLAG_TSO_EN;
plat->pmt = 1;
plat->bsp_priv = mgbe;
@@ -337,7 +337,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
/* Program SID */
writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
- plat->serdes_up_after_phy_linkup = 1;
+ plat->flags |= STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP;
err = stmmac_dvr_probe(&pdev->dev, plat, &res);
if (err < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 1c32b1788f02..dea270f60cc3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -82,29 +82,24 @@ static void dwmac100_dump_dma_regs(struct stmmac_priv *priv,
}
/* DMA controller has two counters to track the number of the missed frames. */
-static void dwmac100_dma_diagnostic_fr(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static void dwmac100_dma_diagnostic_fr(struct stmmac_extra_stats *x,
void __iomem *ioaddr)
{
u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR);
if (unlikely(csr8)) {
if (csr8 & DMA_MISSED_FRAME_OVE) {
- stats->rx_over_errors += 0x800;
x->rx_overflow_cntr += 0x800;
} else {
unsigned int ove_cntr;
ove_cntr = ((csr8 & DMA_MISSED_FRAME_OVE_CNTR) >> 17);
- stats->rx_over_errors += ove_cntr;
x->rx_overflow_cntr += ove_cntr;
}
if (csr8 & DMA_MISSED_FRAME_OVE_M) {
- stats->rx_missed_errors += 0xffff;
x->rx_missed_cntr += 0xffff;
} else {
unsigned int miss_f = (csr8 & DMA_MISSED_FRAME_M_CNTR);
- stats->rx_missed_errors += miss_f;
x->rx_missed_cntr += miss_f;
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 6a011d8633e8..89a14084c611 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -13,8 +13,7 @@
#include "dwmac4.h"
#include "dwmac4_descs.h"
-static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_tx_status(struct stmmac_extra_stats *x,
struct dma_desc *p,
void __iomem *ioaddr)
{
@@ -40,15 +39,13 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
x->tx_frame_flushed++;
if (unlikely(tdes3 & TDES3_LOSS_CARRIER)) {
x->tx_losscarrier++;
- stats->tx_carrier_errors++;
}
if (unlikely(tdes3 & TDES3_NO_CARRIER)) {
x->tx_carrier++;
- stats->tx_carrier_errors++;
}
if (unlikely((tdes3 & TDES3_LATE_COLLISION) ||
(tdes3 & TDES3_EXCESSIVE_COLLISION)))
- stats->collisions +=
+ x->tx_collision +=
(tdes3 & TDES3_COLLISION_COUNT_MASK)
>> TDES3_COLLISION_COUNT_SHIFT;
@@ -73,8 +70,7 @@ static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
return ret;
}
-static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x,
struct dma_desc *p)
{
unsigned int rdes1 = le32_to_cpu(p->des1);
@@ -93,7 +89,7 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
if (unlikely(rdes3 & RDES3_ERROR_SUMMARY)) {
if (unlikely(rdes3 & RDES3_GIANT_PACKET))
- stats->rx_length_errors++;
+ x->rx_length++;
if (unlikely(rdes3 & RDES3_OVERFLOW_ERROR))
x->rx_gmac_overflow++;
@@ -103,10 +99,8 @@ static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
if (unlikely(rdes3 & RDES3_RECEIVE_ERROR))
x->rx_mii++;
- if (unlikely(rdes3 & RDES3_CRC_ERROR)) {
+ if (unlikely(rdes3 & RDES3_CRC_ERROR))
x->rx_crc_errors++;
- stats->rx_crc_errors++;
- }
if (unlikely(rdes3 & RDES3_DRIBBLE_ERROR))
x->dribbling_bit++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 03ceb6a94073..980e5f8a37ec 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,6 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
int ret = 0;
if (dir == DMA_DIR_RX)
@@ -198,18 +200,19 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
}
}
/* TX/RX NORMAL interrupts */
- if (likely(intr_status & DMA_CHAN_STATUS_NIS))
- x->normal_irq_n++;
if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
- x->rx_normal_irq_n++;
- x->rxq_stats[chan].rx_normal_irq_n++;
+ u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_normal_irq_n++;
+ u64_stats_update_end(&rx_q->rxq_stats.syncp);
ret |= handle_rx;
}
if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
- x->tx_normal_irq_n++;
- x->txq_stats[chan].tx_normal_irq_n++;
+ u64_stats_update_begin(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_normal_irq_n++;
+ u64_stats_update_end(&tx_q->txq_stats.syncp);
ret |= handle_tx;
}
+
if (unlikely(intr_status & DMA_CHAN_STATUS_TBU))
ret |= handle_tx;
if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 0b6f999a8305..aaa09b16b016 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -10,6 +10,7 @@
#include <linux/iopoll.h>
#include "common.h"
#include "dwmac_dma.h"
+#include "stmmac.h"
#define GMAC_HI_REG_AE 0x80000000
@@ -161,6 +162,8 @@ static void show_rx_process_state(unsigned int status)
int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir)
{
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
int ret = 0;
/* read the status register (CSR5) */
u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -208,17 +211,20 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
}
/* TX/RX NORMAL interrupts */
if (likely(intr_status & DMA_STATUS_NIS)) {
- x->normal_irq_n++;
if (likely(intr_status & DMA_STATUS_RI)) {
u32 value = readl(ioaddr + DMA_INTR_ENA);
/* to schedule NAPI on real RIE event. */
if (likely(value & DMA_INTR_ENA_RIE)) {
- x->rx_normal_irq_n++;
+ u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_normal_irq_n++;
+ u64_stats_update_end(&rx_q->rxq_stats.syncp);
ret |= handle_rx;
}
}
if (likely(intr_status & DMA_STATUS_TI)) {
- x->tx_normal_irq_n++;
+ u64_stats_update_begin(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_normal_irq_n++;
+ u64_stats_update_end(&tx_q->txq_stats.syncp);
ret |= handle_tx;
}
if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 1913385df685..153321fe42c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -165,7 +165,7 @@
#define XGMAC_DCS_SHIFT 16
#define XGMAC_ADDRx_LOW(x) (0x00000304 + (x) * 0x8)
#define XGMAC_L3L4_ADDR_CTRL 0x00000c00
-#define XGMAC_IDDR GENMASK(15, 8)
+#define XGMAC_IDDR GENMASK(16, 8)
#define XGMAC_IDDR_SHIFT 8
#define XGMAC_IDDR_FNUM 4
#define XGMAC_TT BIT(1)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 13c347ee8be9..fc82862a612c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -8,8 +8,7 @@
#include "common.h"
#include "dwxgmac2.h"
-static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int dwxgmac2_get_tx_status(struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
unsigned int tdes3 = le32_to_cpu(p->des3);
@@ -23,8 +22,7 @@ static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
return ret;
}
-static int dwxgmac2_get_rx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int dwxgmac2_get_rx_status(struct stmmac_extra_stats *x,
struct dma_desc *p)
{
unsigned int rdes3 = le32_to_cpu(p->des3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 070bd912580b..b09395f5edcb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,6 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
int ret = 0;
@@ -364,16 +366,16 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
/* TX/RX NORMAL interrupts */
if (likely(intr_status & XGMAC_NIS)) {
- x->normal_irq_n++;
-
if (likely(intr_status & XGMAC_RI)) {
- x->rx_normal_irq_n++;
- x->rxq_stats[chan].rx_normal_irq_n++;
+ u64_stats_update_begin(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_normal_irq_n++;
+ u64_stats_update_end(&rx_q->rxq_stats.syncp);
ret |= handle_rx;
}
if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
- x->tx_normal_irq_n++;
- x->txq_stats[chan].tx_normal_irq_n++;
+ u64_stats_update_begin(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_normal_irq_n++;
+ u64_stats_update_end(&tx_q->txq_stats.syncp);
ret |= handle_tx;
}
}
@@ -408,6 +410,16 @@ static int dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 1 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27;
+ /* If L3L4FNUM < 8, then the number of L3L4 filters supported by
+ * XGMAC is equal to L3L4FNUM. From L3L4FNUM >= 8 the number of
+ * L3L4 filters goes on like 8, 16, 32, ... Current maximum of
+ * L3L4FNUM = 10.
+ */
+ if (dma_cap->l3l4fnum >= 8 && dma_cap->l3l4fnum <= 10)
+ dma_cap->l3l4fnum = 8 << (dma_cap->l3l4fnum - 8);
+ else if (dma_cap->l3l4fnum > 10)
+ dma_cap->l3l4fnum = 32;
+
dma_cap->hash_tb_sz = (hw_cap & XGMAC_HWFEAT_HASHTBLSZ) >> 24;
dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index a91d8f13a931..937b7a0466fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -12,8 +12,7 @@
#include "common.h"
#include "descs_com.h"
-static int enh_desc_get_tx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int enh_desc_get_tx_status(struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
unsigned int tdes0 = le32_to_cpu(p->des0);
@@ -38,15 +37,13 @@ static int enh_desc_get_tx_status(struct net_device_stats *stats,
if (unlikely(tdes0 & ETDES0_LOSS_CARRIER)) {
x->tx_losscarrier++;
- stats->tx_carrier_errors++;
}
if (unlikely(tdes0 & ETDES0_NO_CARRIER)) {
x->tx_carrier++;
- stats->tx_carrier_errors++;
}
if (unlikely((tdes0 & ETDES0_LATE_COLLISION) ||
(tdes0 & ETDES0_EXCESSIVE_COLLISIONS)))
- stats->collisions +=
+ x->tx_collision +=
(tdes0 & ETDES0_COLLISION_COUNT_MASK) >> 3;
if (unlikely(tdes0 & ETDES0_EXCESSIVE_DEFERRAL))
@@ -117,8 +114,7 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
return ret;
}
-static void enh_desc_get_ext_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static void enh_desc_get_ext_status(struct stmmac_extra_stats *x,
struct dma_extended_desc *p)
{
unsigned int rdes0 = le32_to_cpu(p->basic.des0);
@@ -182,8 +178,7 @@ static void enh_desc_get_ext_status(struct net_device_stats *stats,
}
}
-static int enh_desc_get_rx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int enh_desc_get_rx_status(struct stmmac_extra_stats *x,
struct dma_desc *p)
{
unsigned int rdes0 = le32_to_cpu(p->des0);
@@ -193,14 +188,14 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
return dma_own;
if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
- stats->rx_length_errors++;
+ x->rx_length++;
return discard_frame;
}
if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
x->rx_desc++;
- stats->rx_length_errors++;
+ x->rx_length++;
}
if (unlikely(rdes0 & RDES0_OVERFLOW_ERROR))
x->rx_gmac_overflow++;
@@ -209,7 +204,7 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
pr_err("\tIPC Csum Error/Giant frame\n");
if (unlikely(rdes0 & RDES0_COLLISION))
- stats->collisions++;
+ x->rx_collision++;
if (unlikely(rdes0 & RDES0_RECEIVE_WATCHDOG))
x->rx_watchdog++;
@@ -218,7 +213,6 @@ static int enh_desc_get_rx_status(struct net_device_stats *stats,
if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
x->rx_crc_errors++;
- stats->rx_crc_errors++;
}
ret = discard_frame;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 6ee7cf07cfd7..40147ef24963 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -57,8 +57,7 @@ struct stmmac_desc_ops {
/* Last tx segment reports the transmit status */
int (*get_tx_ls)(struct dma_desc *p);
/* Return the transmit status looking at the TDES1 */
- int (*tx_status)(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+ int (*tx_status)(struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr);
/* Get the buffer size from the descriptor */
int (*get_tx_len)(struct dma_desc *p);
@@ -67,11 +66,9 @@ struct stmmac_desc_ops {
/* Get the receive frame size */
int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
/* Return the reception status looking at the RDES1 */
- int (*rx_status)(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+ int (*rx_status)(struct stmmac_extra_stats *x,
struct dma_desc *p);
- void (*rx_extended_status)(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+ void (*rx_extended_status)(struct stmmac_extra_stats *x,
struct dma_extended_desc *p);
/* Set tx timestamp enable bit */
void (*enable_tx_timestamp) (struct dma_desc *p);
@@ -191,8 +188,7 @@ struct stmmac_dma_ops {
void (*dma_tx_mode)(struct stmmac_priv *priv, void __iomem *ioaddr,
int mode, u32 channel, int fifosz, u8 qmode);
/* To track extra statistic (if supported) */
- void (*dma_diagnostic_fr)(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+ void (*dma_diagnostic_fr)(struct stmmac_extra_stats *x,
void __iomem *ioaddr);
void (*enable_dma_transmission) (void __iomem *ioaddr);
void (*enable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
@@ -536,6 +532,7 @@ struct stmmac_hwtimestamp {
void (*get_systime) (void __iomem *ioaddr, u64 *systime);
void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time);
void (*timestamp_interrupt)(struct stmmac_priv *priv);
+ void (*correct_latency)(struct stmmac_priv *priv);
};
#define stmmac_config_hw_tstamping(__priv, __args...) \
@@ -554,6 +551,8 @@ struct stmmac_hwtimestamp {
stmmac_do_void_callback(__priv, ptp, get_ptptime, __args)
#define stmmac_timestamp_interrupt(__priv, __args...) \
stmmac_do_void_callback(__priv, ptp, timestamp_interrupt, __args)
+#define stmmac_correct_latency(__priv, __args...) \
+ stmmac_do_void_callback(__priv, ptp, correct_latency, __args)
struct stmmac_tx_queue;
struct stmmac_rx_queue;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 350e6670a576..68a7cfcb1d8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -12,8 +12,7 @@
#include "common.h"
#include "descs_com.h"
-static int ndesc_get_tx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int ndesc_get_tx_status(struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
unsigned int tdes0 = le32_to_cpu(p->des0);
@@ -31,15 +30,12 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
if (unlikely(tdes0 & TDES0_ERROR_SUMMARY)) {
if (unlikely(tdes0 & TDES0_UNDERFLOW_ERROR)) {
x->tx_underflow++;
- stats->tx_fifo_errors++;
}
if (unlikely(tdes0 & TDES0_NO_CARRIER)) {
x->tx_carrier++;
- stats->tx_carrier_errors++;
}
if (unlikely(tdes0 & TDES0_LOSS_CARRIER)) {
x->tx_losscarrier++;
- stats->tx_carrier_errors++;
}
if (unlikely((tdes0 & TDES0_EXCESSIVE_DEFERRAL) ||
(tdes0 & TDES0_EXCESSIVE_COLLISIONS) ||
@@ -47,7 +43,7 @@ static int ndesc_get_tx_status(struct net_device_stats *stats,
unsigned int collisions;
collisions = (tdes0 & TDES0_COLLISION_COUNT_MASK) >> 3;
- stats->collisions += collisions;
+ x->tx_collision += collisions;
}
ret = tx_err;
}
@@ -70,8 +66,7 @@ static int ndesc_get_tx_len(struct dma_desc *p)
* and, if required, updates the multicast statistics.
* In case of success, it returns good_frame because the GMAC device
* is supposed to be able to compute the csum in HW. */
-static int ndesc_get_rx_status(struct net_device_stats *stats,
- struct stmmac_extra_stats *x,
+static int ndesc_get_rx_status(struct stmmac_extra_stats *x,
struct dma_desc *p)
{
int ret = good_frame;
@@ -81,7 +76,7 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
return dma_own;
if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
- stats->rx_length_errors++;
+ x->rx_length++;
return discard_frame;
}
@@ -96,11 +91,9 @@ static int ndesc_get_rx_status(struct net_device_stats *stats,
x->ipc_csum_error++;
if (unlikely(rdes0 & RDES0_COLLISION)) {
x->rx_collision++;
- stats->collisions++;
}
if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
x->rx_crc_errors++;
- stats->rx_crc_errors++;
}
ret = discard_frame;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 07ea5ab0a60b..4ce5eaaae513 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -77,6 +77,7 @@ struct stmmac_tx_queue {
dma_addr_t dma_tx_phy;
dma_addr_t tx_tail_addr;
u32 mss;
+ struct stmmac_txq_stats txq_stats;
};
struct stmmac_rx_buffer {
@@ -121,6 +122,7 @@ struct stmmac_rx_queue {
unsigned int len;
unsigned int error;
} state;
+ struct stmmac_rxq_stats rxq_stats;
};
struct stmmac_channel {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2ae73ab842d4..b7ac7abecdd3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -89,14 +89,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
/* Tx/Rx IRQ Events */
STMMAC_STAT(rx_early_irq),
STMMAC_STAT(threshold),
- STMMAC_STAT(tx_pkt_n),
- STMMAC_STAT(rx_pkt_n),
- STMMAC_STAT(normal_irq_n),
- STMMAC_STAT(rx_normal_irq_n),
- STMMAC_STAT(napi_poll),
- STMMAC_STAT(tx_normal_irq_n),
- STMMAC_STAT(tx_clean),
- STMMAC_STAT(tx_set_ic_bit),
STMMAC_STAT(irq_receive_pmt_irq_n),
/* MMC info */
STMMAC_STAT(mmc_tx_irq_n),
@@ -163,9 +155,6 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
STMMAC_STAT(mtl_rx_fifo_ctrl_active),
STMMAC_STAT(mac_rx_frame_ctrl_fifo),
STMMAC_STAT(mac_gmii_rx_proto_engine),
- /* TSO */
- STMMAC_STAT(tx_tso_frames),
- STMMAC_STAT(tx_tso_nfrags),
/* EST */
STMMAC_STAT(mtl_est_cgce),
STMMAC_STAT(mtl_est_hlbs),
@@ -175,6 +164,23 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
};
#define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
+/* statistics collected in queue which will be summed up for all TX or RX
+ * queues, or summed up for both TX and RX queues(napi_poll, normal_irq_n).
+ */
+static const char stmmac_qstats_string[][ETH_GSTRING_LEN] = {
+ "rx_pkt_n",
+ "rx_normal_irq_n",
+ "tx_pkt_n",
+ "tx_normal_irq_n",
+ "tx_clean",
+ "tx_set_ic_bit",
+ "tx_tso_frames",
+ "tx_tso_nfrags",
+ "normal_irq_n",
+ "napi_poll",
+};
+#define STMMAC_QSTATS ARRAY_SIZE(stmmac_qstats_string)
+
/* HW MAC Management counters (if supported) */
#define STMMAC_MMC_STAT(m) \
{ #m, sizeof_field(struct stmmac_counters, m), \
@@ -535,23 +541,44 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 rx_cnt = priv->plat->rx_queues_to_use;
+ unsigned int start;
int q, stat;
+ u64 *pos;
char *p;
+ pos = data;
for (q = 0; q < tx_cnt; q++) {
- p = (char *)priv + offsetof(struct stmmac_priv,
- xstats.txq_stats[q].tx_pkt_n);
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q];
+ struct stmmac_txq_stats snapshot;
+
+ data = pos;
+ do {
+ start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
+ snapshot = tx_q->txq_stats;
+ } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+
+ p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
- *data++ = (*(unsigned long *)p);
- p += sizeof(unsigned long);
+ *data++ += (*(u64 *)p);
+ p += sizeof(u64);
}
}
+
+ pos = data;
for (q = 0; q < rx_cnt; q++) {
- p = (char *)priv + offsetof(struct stmmac_priv,
- xstats.rxq_stats[q].rx_pkt_n);
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q];
+ struct stmmac_rxq_stats snapshot;
+
+ data = pos;
+ do {
+ start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
+ snapshot = rx_q->rxq_stats;
+ } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+
+ p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
- *data++ = (*(unsigned long *)p);
- p += sizeof(unsigned long);
+ *data++ += (*(u64 *)p);
+ p += sizeof(u64);
}
}
}
@@ -562,8 +589,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_queues_count = priv->plat->rx_queues_to_use;
u32 tx_queues_count = priv->plat->tx_queues_to_use;
+ u64 napi_poll = 0, normal_irq_n = 0;
+ int i, j = 0, pos, ret;
unsigned long count;
- int i, j = 0, ret;
+ unsigned int start;
if (priv->dma_cap.asp) {
for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
@@ -574,8 +603,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
}
/* Update the DMA HW counters for dwmac10/100 */
- ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats,
- priv->ioaddr);
+ ret = stmmac_dma_diagnostic_fr(priv, &priv->xstats, priv->ioaddr);
if (ret) {
/* If supported, for new GMAC chips expose the MMC counters */
if (priv->dma_cap.rmon) {
@@ -606,6 +634,48 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
data[j++] = (stmmac_gstrings_stats[i].sizeof_stat ==
sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p);
}
+
+ pos = j;
+ for (i = 0; i < rx_queues_count; i++) {
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i];
+ struct stmmac_rxq_stats snapshot;
+
+ j = pos;
+ do {
+ start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
+ snapshot = rx_q->rxq_stats;
+ } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+
+ data[j++] += snapshot.rx_pkt_n;
+ data[j++] += snapshot.rx_normal_irq_n;
+ normal_irq_n += snapshot.rx_normal_irq_n;
+ napi_poll += snapshot.napi_poll;
+ }
+
+ pos = j;
+ for (i = 0; i < tx_queues_count; i++) {
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i];
+ struct stmmac_txq_stats snapshot;
+
+ j = pos;
+ do {
+ start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
+ snapshot = tx_q->txq_stats;
+ } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+
+ data[j++] += snapshot.tx_pkt_n;
+ data[j++] += snapshot.tx_normal_irq_n;
+ normal_irq_n += snapshot.tx_normal_irq_n;
+ data[j++] += snapshot.tx_clean;
+ data[j++] += snapshot.tx_set_ic_bit;
+ data[j++] += snapshot.tx_tso_frames;
+ data[j++] += snapshot.tx_tso_nfrags;
+ napi_poll += snapshot.napi_poll;
+ }
+ normal_irq_n += priv->xstats.rx_early_irq;
+ data[j++] = normal_irq_n;
+ data[j++] = napi_poll;
+
stmmac_get_per_qstats(priv, &data[j]);
}
@@ -618,7 +688,7 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
switch (sset) {
case ETH_SS_STATS:
- len = STMMAC_STATS_LEN +
+ len = STMMAC_STATS_LEN + STMMAC_QSTATS +
STMMAC_TXQ_STATS * tx_cnt +
STMMAC_RXQ_STATS * rx_cnt;
@@ -691,8 +761,11 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
}
for (i = 0; i < STMMAC_STATS_LEN; i++) {
- memcpy(p, stmmac_gstrings_stats[i].stat_string,
- ETH_GSTRING_LEN);
+ memcpy(p, stmmac_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < STMMAC_QSTATS; i++) {
+ memcpy(p, stmmac_qstats_string[i], ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
stmmac_get_qstats_string(priv, p);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 8b50f03056b7..7e0fa024e0ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -60,6 +60,48 @@ static void config_sub_second_increment(void __iomem *ioaddr,
*ssinc = data;
}
+static void correct_latency(struct stmmac_priv *priv)
+{
+ void __iomem *ioaddr = priv->ptpaddr;
+ u32 reg_tsic, reg_tsicsns;
+ u32 reg_tsec, reg_tsecsns;
+ u64 scaled_ns;
+ u32 val;
+
+ /* MAC-internal ingress latency */
+ scaled_ns = readl(ioaddr + PTP_TS_INGR_LAT);
+
+ /* See section 11.7.2.5.3.1 "Ingress Correction" on page 4001 of
+ * i.MX8MP Applications Processor Reference Manual Rev. 1, 06/2021
+ */
+ val = readl(ioaddr + PTP_TCR);
+ if (val & PTP_TCR_TSCTRLSSR)
+ /* nanoseconds field is in decimal format with granularity of 1ns/bit */
+ scaled_ns = ((u64)NSEC_PER_SEC << 16) - scaled_ns;
+ else
+ /* nanoseconds field is in binary format with granularity of ~0.466ns/bit */
+ scaled_ns = ((1ULL << 31) << 16) -
+ DIV_U64_ROUND_CLOSEST(scaled_ns * PSEC_PER_NSEC, 466U);
+
+ reg_tsic = scaled_ns >> 16;
+ reg_tsicsns = scaled_ns & 0xff00;
+
+ /* set bit 31 for 2's compliment */
+ reg_tsic |= BIT(31);
+
+ writel(reg_tsic, ioaddr + PTP_TS_INGR_CORR_NS);
+ writel(reg_tsicsns, ioaddr + PTP_TS_INGR_CORR_SNS);
+
+ /* MAC-internal egress latency */
+ scaled_ns = readl(ioaddr + PTP_TS_EGR_LAT);
+
+ reg_tsec = scaled_ns >> 16;
+ reg_tsecsns = scaled_ns & 0xff00;
+
+ writel(reg_tsec, ioaddr + PTP_TS_EGR_CORR_NS);
+ writel(reg_tsecsns, ioaddr + PTP_TS_EGR_CORR_SNS);
+}
+
static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
{
u32 value;
@@ -180,7 +222,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
u64 ptp_time;
int i;
- if (priv->plat->int_snapshot_en) {
+ if (priv->plat->flags & STMMAC_FLAG_INT_SNAPSHOT_EN) {
wake_up(&priv->tstamp_busy_wait);
return;
}
@@ -195,7 +237,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
*/
ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS);
- if (!priv->plat->ext_snapshot_en)
+ if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN)
return;
num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >>
@@ -221,4 +263,5 @@ const struct stmmac_hwtimestamp stmmac_ptp = {
.get_systime = get_systime,
.get_ptptime = get_ptptime,
.timestamp_interrupt = timestamp_interrupt,
+ .correct_latency = correct_latency,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4727f7be4f86..19e46e8f626a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -325,7 +325,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
priv->clk_csr = STMMAC_CSR_250_300M;
}
- if (priv->plat->has_sun8i) {
+ if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) {
if (clk_rate > 160000000)
priv->clk_csr = 0x03;
else if (clk_rate > 80000000)
@@ -421,7 +421,7 @@ static int stmmac_enable_eee_mode(struct stmmac_priv *priv)
/* Check and enter in LPI mode */
if (!priv->tx_path_in_lpi_mode)
stmmac_set_eee_mode(priv, priv->hw,
- priv->plat->en_tx_lpi_clockgating);
+ priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING);
return 0;
}
@@ -909,6 +909,8 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
priv->hwts_tx_en = 0;
priv->hwts_rx_en = 0;
+ stmmac_correct_latency(priv, priv);
+
return 0;
}
@@ -991,7 +993,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
u32 old_ctrl, ctrl;
- if (priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup)
+ if ((priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) &&
+ priv->plat->serdes_powerup)
priv->plat->serdes_powerup(priv->dev, priv->plat->bsp_priv);
old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
@@ -1084,7 +1087,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy && priv->dma_cap.eee) {
priv->eee_active =
- phy_init_eee(phy, !priv->plat->rx_clk_runs_in_lpi) >= 0;
+ phy_init_eee(phy, !(priv->plat->flags &
+ STMMAC_FLAG_RX_CLK_RUNS_IN_LPI)) >= 0;
priv->eee_enabled = stmmac_eee_init(priv);
priv->tx_lpi_enabled = priv->eee_enabled;
stmmac_set_eee_pls(priv, priv->hw, true);
@@ -1092,6 +1096,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
if (priv->dma_cap.fpesel)
stmmac_fpe_link_state_handle(priv, true);
+
+ stmmac_correct_latency(priv, priv);
}
static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
@@ -2432,6 +2438,8 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
struct dma_desc *tx_desc = NULL;
struct xdp_desc xdp_desc;
bool work_done = true;
+ u32 tx_set_ic_bit = 0;
+ unsigned long flags;
/* Avoids TX time-out as we are sharing with slow path */
txq_trans_cond_update(nq);
@@ -2492,7 +2500,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
if (set_ic) {
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, tx_desc);
- priv->xstats.tx_set_ic_bit++;
+ tx_set_ic_bit++;
}
stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len,
@@ -2504,6 +2512,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
entry = tx_q->cur_tx;
}
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
if (tx_desc) {
stmmac_flush_tx_descriptors(priv, queue);
@@ -2545,11 +2556,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
unsigned int bytes_compl = 0, pkts_compl = 0;
unsigned int entry, xmits = 0, count = 0;
+ u32 tx_packets = 0, tx_errors = 0;
+ unsigned long flags;
__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
- priv->xstats.tx_clean++;
-
tx_q->xsk_frames_done = 0;
entry = tx_q->dirty_tx;
@@ -2580,8 +2591,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
else
p = tx_q->dma_tx + entry;
- status = stmmac_tx_status(priv, &priv->dev->stats,
- &priv->xstats, p, priv->ioaddr);
+ status = stmmac_tx_status(priv, &priv->xstats, p, priv->ioaddr);
/* Check if the descriptor is owned by the DMA */
if (unlikely(status & tx_dma_own))
break;
@@ -2597,13 +2607,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
if (likely(!(status & tx_not_ls))) {
/* ... verify the status error condition */
if (unlikely(status & tx_err)) {
- priv->dev->stats.tx_errors++;
+ tx_errors++;
if (unlikely(status & tx_err_bump_tc))
stmmac_bump_dma_threshold(priv, queue);
} else {
- priv->dev->stats.tx_packets++;
- priv->xstats.tx_pkt_n++;
- priv->xstats.txq_stats[queue].tx_pkt_n++;
+ tx_packets++;
}
if (skb)
stmmac_get_tx_hwtstamp(priv, p, skb);
@@ -2707,6 +2715,14 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
HRTIMER_MODE_REL);
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_packets += tx_packets;
+ tx_q->txq_stats.tx_pkt_n += tx_packets;
+ tx_q->txq_stats.tx_clean++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+
+ priv->xstats.tx_errors += tx_errors;
+
__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
/* Combine decisions from TX clean and XSK TX */
@@ -2734,7 +2750,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
tx_q->dma_tx_phy, chan);
stmmac_start_tx_dma(priv, chan);
- priv->dev->stats.tx_errors++;
+ priv->xstats.tx_errors++;
netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, chan));
}
@@ -3710,7 +3726,7 @@ static int stmmac_request_irq(struct net_device *dev)
int ret;
/* Request the IRQ lines */
- if (priv->plat->multi_msi_en)
+ if (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN)
ret = stmmac_request_irq_multi_msi(dev);
else
ret = stmmac_request_irq_single(dev);
@@ -3827,10 +3843,6 @@ static int __stmmac_open(struct net_device *dev,
}
}
- /* Extra statistics */
- memset(&priv->xstats, 0, sizeof(struct stmmac_extra_stats));
- priv->xstats.threshold = tc;
-
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
buf_sz = dma_conf->dma_buf_sz;
@@ -3838,7 +3850,8 @@ static int __stmmac_open(struct net_device *dev,
stmmac_reset_queues_param(priv);
- if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) {
+ if (!(priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) &&
+ priv->plat->serdes_powerup) {
ret = priv->plat->serdes_powerup(dev, priv->plat->bsp_priv);
if (ret < 0) {
netdev_err(priv->dev, "%s: Serdes powerup failed\n",
@@ -4110,6 +4123,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
u8 proto_hdr_len, hdr;
+ unsigned long flags;
u32 pay_len, mss;
dma_addr_t des;
int i;
@@ -4258,7 +4272,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
- priv->xstats.tx_set_ic_bit++;
}
/* We've used all descriptors we need for this skb, however,
@@ -4274,9 +4287,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}
- dev->stats.tx_bytes += skb->len;
- priv->xstats.tx_tso_frames++;
- priv->xstats.tx_tso_nfrags += nfrags;
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_bytes += skb->len;
+ tx_q->txq_stats.tx_tso_frames++;
+ tx_q->txq_stats.tx_tso_nfrags += nfrags;
+ if (set_ic)
+ tx_q->txq_stats.tx_set_ic_bit++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4326,7 +4343,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
dma_map_err:
dev_err(priv->device, "Tx dma map failed\n");
dev_kfree_skb(skb);
- priv->dev->stats.tx_dropped++;
+ priv->xstats.tx_dropped++;
return NETDEV_TX_OK;
}
@@ -4352,6 +4369,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_tx_queue *tx_q;
bool has_vlan, set_ic;
int entry, first_tx;
+ unsigned long flags;
dma_addr_t des;
tx_q = &priv->dma_conf.tx_queue[queue];
@@ -4480,7 +4498,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
- priv->xstats.tx_set_ic_bit++;
}
/* We've used all descriptors we need for this skb, however,
@@ -4507,7 +4524,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
}
- dev->stats.tx_bytes += skb->len;
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_bytes += skb->len;
+ if (set_ic)
+ tx_q->txq_stats.tx_set_ic_bit++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4569,7 +4590,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
dma_map_err:
netdev_err(priv->dev, "Tx DMA map failed\n");
dev_kfree_skb(skb);
- priv->dev->stats.tx_dropped++;
+ priv->xstats.tx_dropped++;
return NETDEV_TX_OK;
}
@@ -4770,9 +4791,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
set_ic = false;
if (set_ic) {
+ unsigned long flags;
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, tx_desc);
- priv->xstats.tx_set_ic_bit++;
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.tx_set_ic_bit++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
}
stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -4917,16 +4941,18 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
struct dma_desc *p, struct dma_desc *np,
struct xdp_buff *xdp)
{
+ struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
struct stmmac_channel *ch = &priv->channel[queue];
unsigned int len = xdp->data_end - xdp->data;
enum pkt_hash_types hash_type;
int coe = priv->hw->rx_csum;
+ unsigned long flags;
struct sk_buff *skb;
u32 hash;
skb = stmmac_construct_skb_zc(ch, xdp);
if (!skb) {
- priv->dev->stats.rx_dropped++;
+ priv->xstats.rx_dropped++;
return;
}
@@ -4945,8 +4971,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
skb_record_rx_queue(skb, queue);
napi_gro_receive(&ch->rxtx_napi, skb);
- priv->dev->stats.rx_packets++;
- priv->dev->stats.rx_bytes += len;
+ flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_pkt_n++;
+ rx_q->rxq_stats.rx_bytes += len;
+ u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
}
static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5023,9 +5051,11 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int count = 0, error = 0, len = 0;
int dirty = stmmac_rx_dirty(priv, queue);
unsigned int next_entry = rx_q->cur_rx;
+ u32 rx_errors = 0, rx_dropped = 0;
unsigned int desc_size;
struct bpf_prog *prog;
bool failure = false;
+ unsigned long flags;
int xdp_status = 0;
int status = 0;
@@ -5081,8 +5111,7 @@ read_again:
p = rx_q->dma_rx + entry;
/* read the status of the incoming frame */
- status = stmmac_rx_status(priv, &priv->dev->stats,
- &priv->xstats, p);
+ status = stmmac_rx_status(priv, &priv->xstats, p);
/* check if managed by the DMA otherwise go ahead */
if (unlikely(status & dma_own))
break;
@@ -5104,8 +5133,7 @@ read_again:
break;
if (priv->extend_desc)
- stmmac_rx_extended_status(priv, &priv->dev->stats,
- &priv->xstats,
+ stmmac_rx_extended_status(priv, &priv->xstats,
rx_q->dma_erx + entry);
if (unlikely(status == discard_frame)) {
xsk_buff_free(buf->xdp);
@@ -5113,7 +5141,7 @@ read_again:
dirty++;
error = 1;
if (!priv->hwts_rx_en)
- priv->dev->stats.rx_errors++;
+ rx_errors++;
}
if (unlikely(error && (status & rx_not_ls)))
@@ -5161,7 +5189,7 @@ read_again:
break;
case STMMAC_XDP_CONSUMED:
xsk_buff_free(buf->xdp);
- priv->dev->stats.rx_dropped++;
+ rx_dropped++;
break;
case STMMAC_XDP_TX:
case STMMAC_XDP_REDIRECT:
@@ -5182,8 +5210,12 @@ read_again:
stmmac_finalize_xdp_rx(priv, xdp_status);
- priv->xstats.rx_pkt_n += count;
- priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+ flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_pkt_n += count;
+ u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+
+ priv->xstats.rx_dropped += rx_dropped;
+ priv->xstats.rx_errors += rx_errors;
if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
if (failure || stmmac_rx_dirty(priv, queue) > 0)
@@ -5207,6 +5239,7 @@ read_again:
*/
static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
{
+ u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0;
struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
struct stmmac_channel *ch = &priv->channel[queue];
unsigned int count = 0, error = 0, len = 0;
@@ -5216,6 +5249,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
unsigned int desc_size;
struct sk_buff *skb = NULL;
struct stmmac_xdp_buff ctx;
+ unsigned long flags;
int xdp_status = 0;
int buf_sz;
@@ -5271,8 +5305,7 @@ read_again:
p = rx_q->dma_rx + entry;
/* read the status of the incoming frame */
- status = stmmac_rx_status(priv, &priv->dev->stats,
- &priv->xstats, p);
+ status = stmmac_rx_status(priv, &priv->xstats, p);
/* check if managed by the DMA otherwise go ahead */
if (unlikely(status & dma_own))
break;
@@ -5289,14 +5322,13 @@ read_again:
prefetch(np);
if (priv->extend_desc)
- stmmac_rx_extended_status(priv, &priv->dev->stats,
- &priv->xstats, rx_q->dma_erx + entry);
+ stmmac_rx_extended_status(priv, &priv->xstats, rx_q->dma_erx + entry);
if (unlikely(status == discard_frame)) {
page_pool_recycle_direct(rx_q->page_pool, buf->page);
buf->page = NULL;
error = 1;
if (!priv->hwts_rx_en)
- priv->dev->stats.rx_errors++;
+ rx_errors++;
}
if (unlikely(error && (status & rx_not_ls)))
@@ -5364,7 +5396,7 @@ read_again:
virt_to_head_page(ctx.xdp.data),
sync_len, true);
buf->page = NULL;
- priv->dev->stats.rx_dropped++;
+ rx_dropped++;
/* Clear skb as it was set as
* status by XDP program.
@@ -5393,7 +5425,7 @@ read_again:
skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
if (!skb) {
- priv->dev->stats.rx_dropped++;
+ rx_dropped++;
count++;
goto drain_data;
}
@@ -5413,7 +5445,7 @@ read_again:
priv->dma_conf.dma_buf_sz);
/* Data payload appended into SKB */
- page_pool_release_page(rx_q->page_pool, buf->page);
+ skb_mark_for_recycle(skb);
buf->page = NULL;
}
@@ -5425,7 +5457,7 @@ read_again:
priv->dma_conf.dma_buf_sz);
/* Data payload appended into SKB */
- page_pool_release_page(rx_q->page_pool, buf->sec_page);
+ skb_mark_for_recycle(skb);
buf->sec_page = NULL;
}
@@ -5453,8 +5485,8 @@ drain_data:
napi_gro_receive(&ch->rx_napi, skb);
skb = NULL;
- priv->dev->stats.rx_packets++;
- priv->dev->stats.rx_bytes += len;
+ rx_packets++;
+ rx_bytes += len;
count++;
}
@@ -5469,8 +5501,14 @@ drain_data:
stmmac_rx_refill(priv, queue);
- priv->xstats.rx_pkt_n += count;
- priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+ flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.rx_packets += rx_packets;
+ rx_q->rxq_stats.rx_bytes += rx_bytes;
+ rx_q->rxq_stats.rx_pkt_n += count;
+ u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+
+ priv->xstats.rx_dropped += rx_dropped;
+ priv->xstats.rx_errors += rx_errors;
return count;
}
@@ -5480,10 +5518,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
struct stmmac_channel *ch =
container_of(napi, struct stmmac_channel, rx_napi);
struct stmmac_priv *priv = ch->priv_data;
+ struct stmmac_rx_queue *rx_q;
u32 chan = ch->index;
+ unsigned long flags;
int work_done;
- priv->xstats.napi_poll++;
+ rx_q = &priv->dma_conf.rx_queue[chan];
+ flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.napi_poll++;
+ u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
work_done = stmmac_rx(priv, budget, chan);
if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5502,10 +5545,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
struct stmmac_channel *ch =
container_of(napi, struct stmmac_channel, tx_napi);
struct stmmac_priv *priv = ch->priv_data;
+ struct stmmac_tx_queue *tx_q;
u32 chan = ch->index;
+ unsigned long flags;
int work_done;
- priv->xstats.napi_poll++;
+ tx_q = &priv->dma_conf.tx_queue[chan];
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.napi_poll++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
work_done = stmmac_tx_clean(priv, budget, chan);
work_done = min(work_done, budget);
@@ -5527,9 +5575,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
container_of(napi, struct stmmac_channel, rxtx_napi);
struct stmmac_priv *priv = ch->priv_data;
int rx_done, tx_done, rxtx_done;
+ struct stmmac_rx_queue *rx_q;
+ struct stmmac_tx_queue *tx_q;
u32 chan = ch->index;
+ unsigned long flags;
+
+ rx_q = &priv->dma_conf.rx_queue[chan];
+ flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
+ rx_q->rxq_stats.napi_poll++;
+ u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
- priv->xstats.napi_poll++;
+ tx_q = &priv->dma_conf.tx_queue[chan];
+ flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
+ tx_q->txq_stats.napi_poll++;
+ u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
tx_done = stmmac_tx_clean(priv, budget, chan);
tx_done = min(tx_done, budget);
@@ -5677,7 +5736,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
features &= ~NETIF_F_CSUM_MASK;
/* Disable tso if asked by ethtool */
- if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
+ if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) {
if (features & NETIF_F_TSO)
priv->tso = true;
else
@@ -5798,7 +5857,8 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
}
/* PCS link status */
- if (priv->hw->pcs && !priv->plat->has_integrated_pcs) {
+ if (priv->hw->pcs &&
+ !(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS)) {
if (priv->xstats.pcs_link)
netif_carrier_on(priv->dev);
else
@@ -5951,7 +6011,7 @@ static void stmmac_poll_controller(struct net_device *dev)
if (test_bit(STMMAC_DOWN, &priv->state))
return;
- if (priv->plat->multi_msi_en) {
+ if (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN) {
for (i = 0; i < priv->plat->rx_queues_to_use; i++)
stmmac_msi_intr_rx(0, &priv->dma_conf.rx_queue[i]);
@@ -6788,6 +6848,56 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
return 0;
}
+static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+ u32 tx_cnt = priv->plat->tx_queues_to_use;
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
+ unsigned int start;
+ int q;
+
+ for (q = 0; q < tx_cnt; q++) {
+ struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats;
+ u64 tx_packets;
+ u64 tx_bytes;
+
+ do {
+ start = u64_stats_fetch_begin(&txq_stats->syncp);
+ tx_packets = txq_stats->tx_packets;
+ tx_bytes = txq_stats->tx_bytes;
+ } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+
+ stats->tx_packets += tx_packets;
+ stats->tx_bytes += tx_bytes;
+ }
+
+ for (q = 0; q < rx_cnt; q++) {
+ struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats;
+ u64 rx_packets;
+ u64 rx_bytes;
+
+ do {
+ start = u64_stats_fetch_begin(&rxq_stats->syncp);
+ rx_packets = rxq_stats->rx_packets;
+ rx_bytes = rxq_stats->rx_bytes;
+ } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ }
+
+ stats->rx_dropped = priv->xstats.rx_dropped;
+ stats->rx_errors = priv->xstats.rx_errors;
+ stats->tx_dropped = priv->xstats.tx_dropped;
+ stats->tx_errors = priv->xstats.tx_errors;
+ stats->tx_carrier_errors = priv->xstats.tx_losscarrier + priv->xstats.tx_carrier;
+ stats->collisions = priv->xstats.tx_collision + priv->xstats.rx_collision;
+ stats->rx_length_errors = priv->xstats.rx_length;
+ stats->rx_crc_errors = priv->xstats.rx_crc_errors;
+ stats->rx_over_errors = priv->xstats.rx_overflow_cntr;
+ stats->rx_missed_errors = priv->xstats.rx_missed_cntr;
+}
+
static const struct net_device_ops stmmac_netdev_ops = {
.ndo_open = stmmac_open,
.ndo_start_xmit = stmmac_xmit,
@@ -6798,6 +6908,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
.ndo_set_rx_mode = stmmac_set_rx_mode,
.ndo_tx_timeout = stmmac_tx_timeout,
.ndo_eth_ioctl = stmmac_ioctl,
+ .ndo_get_stats64 = stmmac_get_stats64,
.ndo_setup_tc = stmmac_setup_tc,
.ndo_select_queue = stmmac_select_queue,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -6855,7 +6966,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
int ret;
/* dwmac-sun8i only work in chain mode */
- if (priv->plat->has_sun8i)
+ if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I)
chain_mode = 1;
priv->chain_mode = chain_mode;
@@ -6876,7 +6987,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
*/
priv->plat->enh_desc = priv->dma_cap.enh_desc;
priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up &&
- !priv->plat->use_phy_wol;
+ !(priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL);
priv->hw->pmt = priv->plat->pmt;
if (priv->dma_cap.hash_tb_sz) {
priv->hw->multicast_filter_bins =
@@ -6920,7 +7031,8 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
if (priv->dma_cap.tsoen)
dev_info(priv->device, "TSO supported\n");
- priv->hw->vlan_fail_q_en = priv->plat->vlan_fail_q_en;
+ priv->hw->vlan_fail_q_en =
+ (priv->plat->flags & STMMAC_FLAG_VLAN_FAIL_Q_EN);
priv->hw->vlan_fail_q = priv->plat->vlan_fail_q;
/* Run HW quirks, if any */
@@ -7160,12 +7272,18 @@ int stmmac_dvr_probe(struct device *device,
priv->device = device;
priv->dev = ndev;
+ for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
+ u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp);
+ for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
+ u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp);
+
stmmac_set_ethtool_ops(ndev);
priv->pause = pause;
priv->plat = plat_dat;
priv->ioaddr = res->addr;
priv->dev->base_addr = (unsigned long)res->addr;
- priv->plat->dma_cfg->multi_msi_en = priv->plat->multi_msi_en;
+ priv->plat->dma_cfg->multi_msi_en =
+ (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN);
priv->dev->irq = res->irq;
priv->wol_irq = res->wol_irq;
@@ -7249,7 +7367,7 @@ int stmmac_dvr_probe(struct device *device,
ndev->hw_features |= NETIF_F_HW_TC;
}
- if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
+ if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) {
ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
if (priv->plat->has_gmac4)
ndev->hw_features |= NETIF_F_GSO_UDP_L4;
@@ -7257,7 +7375,8 @@ int stmmac_dvr_probe(struct device *device,
dev_info(priv->device, "TSO feature enabled\n");
}
- if (priv->dma_cap.sphen && !priv->plat->sph_disable) {
+ if (priv->dma_cap.sphen &&
+ !(priv->plat->flags & STMMAC_FLAG_SPH_DISABLE)) {
ndev->hw_features |= NETIF_F_GRO;
priv->sph_cap = true;
priv->sph = priv->sph_cap;
@@ -7315,6 +7434,8 @@ int stmmac_dvr_probe(struct device *device,
#endif
priv->msg_enable = netif_msg_init(debug, default_msg_level);
+ priv->xstats.threshold = tc;
+
/* Initialize RSS */
rxq = priv->plat->rx_queues_to_use;
netdev_rss_key_fill(priv->rss.key, sizeof(priv->rss.key));
@@ -7621,7 +7742,8 @@ int stmmac_resume(struct device *dev)
stmmac_mdio_reset(priv->mii);
}
- if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) {
+ if (!(priv->plat->flags & STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP) &&
+ priv->plat->serdes_powerup) {
ret = priv->plat->serdes_powerup(ndev,
priv->plat->bsp_priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 644bb54f5f02..352b01678c22 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -77,7 +77,7 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
plat->clk_csr = 5;
plat->has_gmac4 = 1;
plat->force_sf_dma_mode = 1;
- plat->tso_en = 1;
+ plat->flags |= STMMAC_FLAG_TSO_EN;
plat->pmt = 1;
/* Set default value for multicast hash bins */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 231152ee5a32..23d53ea04b24 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -466,8 +466,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
plat->force_sf_dma_mode =
of_property_read_bool(np, "snps,force_sf_dma_mode");
- plat->en_tx_lpi_clockgating =
- of_property_read_bool(np, "snps,en-tx-lpi-clockgating");
+ if (of_property_read_bool(np, "snps,en-tx-lpi-clockgating"))
+ plat->flags |= STMMAC_FLAG_EN_TX_LPI_CLOCKGATING;
/* Set the maxmtu to a default of JUMBO_LEN in case the
* parameter is not present in the device tree.
@@ -525,7 +525,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
plat->has_gmac4 = 1;
plat->has_gmac = 0;
plat->pmt = 1;
- plat->tso_en = of_property_read_bool(np, "snps,tso");
+ if (of_property_read_bool(np, "snps,tso"))
+ plat->flags |= STMMAC_FLAG_TSO_EN;
}
if (of_device_is_compatible(np, "snps,dwmac-3.610") ||
@@ -538,7 +539,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
if (of_device_is_compatible(np, "snps,dwxgmac")) {
plat->has_xgmac = 1;
plat->pmt = 1;
- plat->tso_en = of_property_read_bool(np, "snps,tso");
+ if (of_property_read_bool(np, "snps,tso"))
+ plat->flags |= STMMAC_FLAG_TSO_EN;
}
dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index b4388ca8d211..3d7825cb30bb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -192,7 +192,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
write_unlock_irqrestore(&priv->ptp_lock, flags);
break;
case PTP_CLK_REQ_EXTTS:
- priv->plat->ext_snapshot_en = on;
+ if (on)
+ priv->plat->flags |= STMMAC_FLAG_EXT_SNAPSHOT_EN;
+ else
+ priv->plat->flags &= ~STMMAC_FLAG_EXT_SNAPSHOT_EN;
mutex_lock(&priv->aux_ts_lock);
acr_value = readl(ptpaddr + PTP_ACR);
acr_value &= ~PTP_ACR_MASK;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index bf619295d079..d1fe4b46f162 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -26,6 +26,12 @@
#define PTP_ACR 0x40 /* Auxiliary Control Reg */
#define PTP_ATNR 0x48 /* Auxiliary Timestamp - Nanoseconds Reg */
#define PTP_ATSR 0x4c /* Auxiliary Timestamp - Seconds Reg */
+#define PTP_TS_INGR_CORR_NS 0x58 /* Ingress timestamp correction nanoseconds */
+#define PTP_TS_EGR_CORR_NS 0x5C /* Egress timestamp correction nanoseconds*/
+#define PTP_TS_INGR_CORR_SNS 0x60 /* Ingress timestamp correction subnanoseconds */
+#define PTP_TS_EGR_CORR_SNS 0x64 /* Egress timestamp correction subnanoseconds */
+#define PTP_TS_INGR_LAT 0x68 /* MAC internal Ingress Latency */
+#define PTP_TS_EGR_LAT 0x6c /* MAC internal Egress Latency */
#define PTP_STNSUR_ADDSUB_SHIFT 31
#define PTP_DIGITAL_ROLLOVER_MODE 0x3B9ACA00 /* 10e9-1 ns */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 6321178fc814..85dc16faca54 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -432,71 +432,6 @@ out:
EXPORT_SYMBOL(wx_read_ee_hostif_buffer);
/**
- * wx_calculate_checksum - Calculate checksum for buffer
- * @buffer: pointer to EEPROM
- * @length: size of EEPROM to calculate a checksum for
- * Calculates the checksum for some buffer on a specified length. The
- * checksum calculated is returned.
- **/
-static u8 wx_calculate_checksum(u8 *buffer, u32 length)
-{
- u8 sum = 0;
- u32 i;
-
- if (!buffer)
- return 0;
-
- for (i = 0; i < length; i++)
- sum += buffer[i];
-
- return (u8)(0 - sum);
-}
-
-/**
- * wx_reset_hostif - send reset cmd to fw
- * @wx: pointer to hardware structure
- *
- * Sends reset cmd to firmware through the manageability
- * block.
- **/
-int wx_reset_hostif(struct wx *wx)
-{
- struct wx_hic_reset reset_cmd;
- int ret_val = 0;
- int i;
-
- reset_cmd.hdr.cmd = FW_RESET_CMD;
- reset_cmd.hdr.buf_len = FW_RESET_LEN;
- reset_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
- reset_cmd.lan_id = wx->bus.func;
- reset_cmd.reset_type = (u16)wx->reset_type;
- reset_cmd.hdr.checksum = 0;
- reset_cmd.hdr.checksum = wx_calculate_checksum((u8 *)&reset_cmd,
- (FW_CEM_HDR_LEN +
- reset_cmd.hdr.buf_len));
-
- for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
- ret_val = wx_host_interface_command(wx, (u32 *)&reset_cmd,
- sizeof(reset_cmd),
- WX_HI_COMMAND_TIMEOUT,
- true);
- if (ret_val != 0)
- continue;
-
- if (reset_cmd.hdr.cmd_or_resp.ret_status ==
- FW_CEM_RESP_STATUS_SUCCESS)
- ret_val = 0;
- else
- ret_val = -EFAULT;
-
- break;
- }
-
- return ret_val;
-}
-EXPORT_SYMBOL(wx_reset_hostif);
-
-/**
* wx_init_eeprom_params - Initialize EEPROM params
* @wx: pointer to hardware structure
*
@@ -1501,7 +1436,7 @@ static void wx_restore_vlan(struct wx *wx)
*
* Configure the Rx unit of the MAC after a reset.
**/
-static void wx_configure_rx(struct wx *wx)
+void wx_configure_rx(struct wx *wx)
{
u32 psrtype, i;
int ret;
@@ -1544,6 +1479,7 @@ static void wx_configure_rx(struct wx *wx)
wx_enable_rx(wx);
wx_enable_sec_rx_path(wx);
}
+EXPORT_SYMBOL(wx_configure_rx);
static void wx_configure_isb(struct wx *wx)
{
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 1f93ca32c921..0b3447bc6f2f 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -14,7 +14,6 @@ int wx_host_interface_command(struct wx *wx, u32 *buffer,
int wx_read_ee_hostif(struct wx *wx, u16 offset, u16 *data);
int wx_read_ee_hostif_buffer(struct wx *wx,
u16 offset, u16 words, u16 *data);
-int wx_reset_hostif(struct wx *wx);
void wx_init_eeprom_params(struct wx *wx);
void wx_get_mac_addr(struct wx *wx, u8 *mac_addr);
void wx_init_rx_addrs(struct wx *wx);
@@ -25,6 +24,7 @@ void wx_disable_rx(struct wx *wx);
void wx_set_rx_mode(struct net_device *netdev);
int wx_change_mtu(struct net_device *netdev, int new_mtu);
void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring);
+void wx_configure_rx(struct wx *wx);
void wx_configure(struct wx *wx);
void wx_start_hw(struct wx *wx);
int wx_disable_pcie_master(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 29dfb561887d..1de88a33a698 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -160,6 +160,10 @@
#define WX_PSR_LAN_FLEX_DW_H(_i) (0x15C04 + ((_i) * 16))
#define WX_PSR_LAN_FLEX_MSK(_i) (0x15C08 + ((_i) * 16))
+#define WX_PSR_WKUP_CTL 0x15B80
+/* Wake Up Filter Control Bit */
+#define WX_PSR_WKUP_CTL_MAG BIT(1) /* Magic Packet Wakeup Enable */
+
/* vlan tbl */
#define WX_PSR_VLAN_TBL(_i) (0x16000 + ((_i) * 4))
@@ -846,7 +850,7 @@ struct wx {
int duplex;
struct phy_device *phydev;
- bool wol_enabled;
+ bool wol_hw_supported;
bool ncsi_enabled;
bool gpio_ctrl;
raw_spinlock_t gpio_lock;
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
index 5b25834baf38..ec0e869e9aac 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
@@ -6,14 +6,49 @@
#include <linux/netdevice.h>
#include "../libwx/wx_ethtool.h"
+#include "../libwx/wx_type.h"
#include "ngbe_ethtool.h"
+static void ngbe_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct wx *wx = netdev_priv(netdev);
+
+ if (!wx->wol_hw_supported)
+ return;
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = 0;
+ if (wx->wol & WX_PSR_WKUP_CTL_MAG)
+ wol->wolopts |= WAKE_MAGIC;
+}
+
+static int ngbe_set_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct wx *wx = netdev_priv(netdev);
+ struct pci_dev *pdev = wx->pdev;
+
+ if (!wx->wol_hw_supported)
+ return -EOPNOTSUPP;
+
+ wx->wol = 0;
+ if (wol->wolopts & WAKE_MAGIC)
+ wx->wol = WX_PSR_WKUP_CTL_MAG;
+ netdev->wol_enabled = !!(wx->wol);
+ wr32(wx, WX_PSR_WKUP_CTL, wx->wol);
+ device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled);
+
+ return 0;
+}
+
static const struct ethtool_ops ngbe_ethtool_ops = {
.get_drvinfo = wx_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.nway_reset = phy_ethtool_nway_reset,
+ .get_wol = ngbe_get_wol,
+ .set_wol = ngbe_set_wol,
};
void ngbe_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index c99a5d3de72e..2b431db6085a 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -62,7 +62,7 @@ static void ngbe_init_type_code(struct wx *wx)
em_mac_type_rgmii :
em_mac_type_mdi;
- wx->wol_enabled = (wol_mask == NGBE_WOL_SUP) ? 1 : 0;
+ wx->wol_hw_supported = (wol_mask == NGBE_WOL_SUP) ? 1 : 0;
wx->ncsi_enabled = (ncsi_mask == NGBE_NCSI_MASK ||
type_mask == NGBE_SUBID_OCP_CARD) ? 1 : 0;
@@ -440,14 +440,26 @@ static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
{
struct wx *wx = pci_get_drvdata(pdev);
struct net_device *netdev;
+ u32 wufc = wx->wol;
netdev = wx->netdev;
+ rtnl_lock();
netif_device_detach(netdev);
- rtnl_lock();
if (netif_running(netdev))
- ngbe_down(wx);
+ ngbe_close(netdev);
+ wx_clear_interrupt_scheme(wx);
rtnl_unlock();
+
+ if (wufc) {
+ wx_set_rx_mode(netdev);
+ wx_configure_rx(wx);
+ wr32(wx, NGBE_PSR_WKUP_CTL, wufc);
+ } else {
+ wr32(wx, NGBE_PSR_WKUP_CTL, 0);
+ }
+ pci_wake_from_d3(pdev, !!wufc);
+ *enable_wake = !!wufc;
wx_control_hw(wx, false);
pci_disable_device(pdev);
@@ -621,12 +633,11 @@ static int ngbe_probe(struct pci_dev *pdev,
}
wx->wol = 0;
- if (wx->wol_enabled)
+ if (wx->wol_hw_supported)
wx->wol = NGBE_PSR_WKUP_CTL_MAG;
- wx->wol_enabled = !!(wx->wol);
+ netdev->wol_enabled = !!(wx->wol);
wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol);
-
device_set_wakeup_enable(&pdev->dev, wx->wol);
/* Save off EEPROM version number and Option Rom version which
@@ -712,11 +723,52 @@ static void ngbe_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
+static int ngbe_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ bool wake;
+
+ ngbe_dev_shutdown(pdev, &wake);
+ device_set_wakeup_enable(&pdev->dev, wake);
+
+ return 0;
+}
+
+static int ngbe_resume(struct pci_dev *pdev)
+{
+ struct net_device *netdev;
+ struct wx *wx;
+ u32 err;
+
+ wx = pci_get_drvdata(pdev);
+ netdev = wx->netdev;
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ wx_err(wx, "Cannot enable PCI device from suspend\n");
+ return err;
+ }
+ pci_set_master(pdev);
+ device_wakeup_disable(&pdev->dev);
+
+ ngbe_reset_hw(wx);
+ rtnl_lock();
+ err = wx_init_interrupt_scheme(wx);
+ if (!err && netif_running(netdev))
+ err = ngbe_open(netdev);
+ if (!err)
+ netif_device_attach(netdev);
+ rtnl_unlock();
+
+ return 0;
+}
+
static struct pci_driver ngbe_driver = {
.name = ngbe_driver_name,
.id_table = ngbe_pci_tbl,
.probe = ngbe_probe,
.remove = ngbe_remove,
+ .suspend = ngbe_suspend,
+ .resume = ngbe_resume,
.shutdown = ngbe_shutdown,
};
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index c9ddbbc3fa4f..cc2f325a52f7 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -236,6 +236,7 @@ static void ngbe_phy_fixup(struct wx *wx)
phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+ phydev->mac_managed_pm = true;
if (wx->mac_type != em_mac_type_mdi)
return;
/* disable EEE, internal phy does not support eee */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
index 0772eb14eabf..6e130d1f7a7b 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
@@ -257,16 +257,16 @@ static void txgbe_reset_misc(struct wx *wx)
int txgbe_reset_hw(struct wx *wx)
{
int status;
+ u32 val;
/* Call adapter stop to disable tx/rx and clear interrupts */
status = wx_stop_adapter(wx);
if (status != 0)
return status;
- if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) ||
- ((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP)))
- wx_reset_hostif(wx);
-
+ val = WX_MIS_RST_LAN_RST(wx->bus.func);
+ wr32(wx, WX_MIS_RST, val | rd32(wx, WX_MIS_RST));
+ WX_WRITE_FLUSH(wx);
usleep_range(10, 100);
status = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_LAN_SW_RST(wx->bus.func));
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index acb20ad4e37e..144ec626230d 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -243,7 +243,8 @@ static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
fl4->flowi4_oif = sk->sk_bound_dev_if;
fl4->daddr = daddr;
fl4->saddr = saddr;
- fl4->flowi4_tos = RT_CONN_FLAGS(sk);
+ fl4->flowi4_tos = ip_sock_rt_tos(sk);
+ fl4->flowi4_scope = ip_sock_rt_scope(sk);
fl4->flowi4_proto = sk->sk_protocol;
return ip_route_output_key(sock_net(sk), fl4);
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index bfc9be23c973..6b26a0803696 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -334,6 +334,8 @@ static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops,
unimac_mdio_suspend, unimac_mdio_resume);
static const struct of_device_id unimac_mdio_ids[] = {
+ { .compatible = "brcm,asp-v2.1-mdio", },
+ { .compatible = "brcm,asp-v2.0-mdio", },
{ .compatible = "brcm,genet-mdio-v5", },
{ .compatible = "brcm,genet-mdio-v4", },
{ .compatible = "brcm,genet-mdio-v3", },
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 7aafc221b5cf..683e8f8319ab 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(xgene_mdio_wr_mac);
int xgene_mdio_rgmii_read(struct mii_bus *bus, int phy_id, int reg)
{
- struct xgene_mdio_pdata *pdata = (struct xgene_mdio_pdata *)bus->priv;
+ struct xgene_mdio_pdata *pdata = bus->priv;
u32 data, done;
u8 wait = 10;
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(xgene_mdio_rgmii_read);
int xgene_mdio_rgmii_write(struct mii_bus *bus, int phy_id, int reg, u16 data)
{
- struct xgene_mdio_pdata *pdata = (struct xgene_mdio_pdata *)bus->priv;
+ struct xgene_mdio_pdata *pdata = bus->priv;
u32 val, done;
u8 wait = 10;
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 4f4f79532c6c..87f18aedd3bd 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -36,6 +36,7 @@
#include <linux/inet.h>
#include <linux/configfs.h>
#include <linux/etherdevice.h>
+#include <linux/utsname.h>
MODULE_AUTHOR("Maintainer: Matt Mackall <mpm@selenic.com>");
MODULE_DESCRIPTION("Console driver for network interfaces");
@@ -84,6 +85,8 @@ static struct console netconsole_ext;
* Also, other parameters of a target may be modified at
* runtime only when it is disabled (enabled == 0).
* @extended: Denotes whether console is extended or not.
+ * @release: Denotes whether kernel release version should be prepended
+ * to the message. Depends on extended console.
* @np: The netpoll structure for this target.
* Contains the other userspace visible parameters:
* dev_name (read-write)
@@ -101,6 +104,7 @@ struct netconsole_target {
#endif
bool enabled;
bool extended;
+ bool release;
struct netpoll np;
};
@@ -188,6 +192,15 @@ static struct netconsole_target *alloc_param_target(char *target_config)
target_config++;
}
+ if (*target_config == 'r') {
+ if (!nt->extended) {
+ pr_err("Netconsole configuration error. Release feature requires extended log message");
+ goto fail;
+ }
+ nt->release = true;
+ target_config++;
+ }
+
/* Parse parameters and setup netpoll */
err = netpoll_parse_options(&nt->np, target_config);
if (err)
@@ -222,6 +235,7 @@ static void free_param_target(struct netconsole_target *nt)
* |
* <target>/
* | enabled
+ * | release
* | dev_name
* | local_port
* | remote_port
@@ -246,27 +260,32 @@ static struct netconsole_target *to_target(struct config_item *item)
static ssize_t enabled_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", to_target(item)->enabled);
+ return sysfs_emit(buf, "%d\n", to_target(item)->enabled);
}
static ssize_t extended_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", to_target(item)->extended);
+ return sysfs_emit(buf, "%d\n", to_target(item)->extended);
+}
+
+static ssize_t release_show(struct config_item *item, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", to_target(item)->release);
}
static ssize_t dev_name_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%s\n", to_target(item)->np.dev_name);
+ return sysfs_emit(buf, "%s\n", to_target(item)->np.dev_name);
}
static ssize_t local_port_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", to_target(item)->np.local_port);
+ return sysfs_emit(buf, "%d\n", to_target(item)->np.local_port);
}
static ssize_t remote_port_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", to_target(item)->np.remote_port);
+ return sysfs_emit(buf, "%d\n", to_target(item)->np.remote_port);
}
static ssize_t local_ip_show(struct config_item *item, char *buf)
@@ -274,9 +293,9 @@ static ssize_t local_ip_show(struct config_item *item, char *buf)
struct netconsole_target *nt = to_target(item);
if (nt->np.ipv6)
- return snprintf(buf, PAGE_SIZE, "%pI6c\n", &nt->np.local_ip.in6);
+ return sysfs_emit(buf, "%pI6c\n", &nt->np.local_ip.in6);
else
- return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip);
+ return sysfs_emit(buf, "%pI4\n", &nt->np.local_ip);
}
static ssize_t remote_ip_show(struct config_item *item, char *buf)
@@ -284,9 +303,9 @@ static ssize_t remote_ip_show(struct config_item *item, char *buf)
struct netconsole_target *nt = to_target(item);
if (nt->np.ipv6)
- return snprintf(buf, PAGE_SIZE, "%pI6c\n", &nt->np.remote_ip.in6);
+ return sysfs_emit(buf, "%pI6c\n", &nt->np.remote_ip.in6);
else
- return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip);
+ return sysfs_emit(buf, "%pI4\n", &nt->np.remote_ip);
}
static ssize_t local_mac_show(struct config_item *item, char *buf)
@@ -294,12 +313,12 @@ static ssize_t local_mac_show(struct config_item *item, char *buf)
struct net_device *dev = to_target(item)->np.dev;
static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
- return snprintf(buf, PAGE_SIZE, "%pM\n", dev ? dev->dev_addr : bcast);
+ return sysfs_emit(buf, "%pM\n", dev ? dev->dev_addr : bcast);
}
static ssize_t remote_mac_show(struct config_item *item, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%pM\n", to_target(item)->np.remote_mac);
+ return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac);
}
/*
@@ -314,17 +333,15 @@ static ssize_t enabled_store(struct config_item *item,
{
struct netconsole_target *nt = to_target(item);
unsigned long flags;
- int enabled;
+ bool enabled;
int err;
mutex_lock(&dynamic_netconsole_mutex);
- err = kstrtoint(buf, 10, &enabled);
- if (err < 0)
+ err = kstrtobool(buf, &enabled);
+ if (err)
goto out_unlock;
err = -EINVAL;
- if (enabled < 0 || enabled > 1)
- goto out_unlock;
if ((bool)enabled == nt->enabled) {
pr_info("network logging has already %s\n",
nt->enabled ? "started" : "stopped");
@@ -332,6 +349,11 @@ static ssize_t enabled_store(struct config_item *item,
}
if (enabled) { /* true */
+ if (nt->release && !nt->extended) {
+ pr_err("Not enabling netconsole. Release feature requires extended log message");
+ goto out_unlock;
+ }
+
if (nt->extended && !console_is_registered(&netconsole_ext))
register_console(&netconsole_ext);
@@ -366,11 +388,11 @@ out_unlock:
return err;
}
-static ssize_t extended_store(struct config_item *item, const char *buf,
- size_t count)
+static ssize_t release_store(struct config_item *item, const char *buf,
+ size_t count)
{
struct netconsole_target *nt = to_target(item);
- int extended;
+ bool release;
int err;
mutex_lock(&dynamic_netconsole_mutex);
@@ -381,14 +403,38 @@ static ssize_t extended_store(struct config_item *item, const char *buf,
goto out_unlock;
}
- err = kstrtoint(buf, 10, &extended);
- if (err < 0)
+ err = kstrtobool(buf, &release);
+ if (err)
goto out_unlock;
- if (extended < 0 || extended > 1) {
+
+ nt->release = release;
+
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return strnlen(buf, count);
+out_unlock:
+ mutex_unlock(&dynamic_netconsole_mutex);
+ return err;
+}
+
+static ssize_t extended_store(struct config_item *item, const char *buf,
+ size_t count)
+{
+ struct netconsole_target *nt = to_target(item);
+ bool extended;
+ int err;
+
+ mutex_lock(&dynamic_netconsole_mutex);
+ if (nt->enabled) {
+ pr_err("target (%s) is enabled, disable to update parameters\n",
+ config_item_name(&nt->item));
err = -EINVAL;
goto out_unlock;
}
+ err = kstrtobool(buf, &extended);
+ if (err)
+ goto out_unlock;
+
nt->extended = extended;
mutex_unlock(&dynamic_netconsole_mutex);
@@ -576,10 +622,12 @@ CONFIGFS_ATTR(, local_ip);
CONFIGFS_ATTR(, remote_ip);
CONFIGFS_ATTR_RO(, local_mac);
CONFIGFS_ATTR(, remote_mac);
+CONFIGFS_ATTR(, release);
static struct configfs_attribute *netconsole_target_attrs[] = {
&attr_enabled,
&attr_extended,
+ &attr_release,
&attr_dev_name,
&attr_local_port,
&attr_remote_port,
@@ -772,9 +820,23 @@ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
const char *header, *body;
int offset = 0;
int header_len, body_len;
+ const char *msg_ready = msg;
+ const char *release;
+ int release_len = 0;
- if (msg_len <= MAX_PRINT_CHUNK) {
- netpoll_send_udp(&nt->np, msg, msg_len);
+ if (nt->release) {
+ release = init_utsname()->release;
+ release_len = strlen(release) + 1;
+ }
+
+ if (msg_len + release_len <= MAX_PRINT_CHUNK) {
+ /* No fragmentation needed */
+ if (nt->release) {
+ scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg);
+ msg_len += release_len;
+ msg_ready = buf;
+ }
+ netpoll_send_udp(&nt->np, msg_ready, msg_len);
return;
}
@@ -792,7 +854,10 @@ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
* Transfer multiple chunks with the following extra header.
* "ncfrag=<byte-offset>/<total-bytes>"
*/
- memcpy(buf, header, header_len);
+ if (nt->release)
+ scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release);
+ memcpy(buf + release_len, header, header_len);
+ header_len += release_len;
while (offset < body_len) {
int this_header = header_len;
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index 5735e5b1a2cb..f8de93bc5f5b 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -17,3 +17,7 @@ endif
ifneq ($(CONFIG_PSAMPLE),)
netdevsim-objs += psample.o
endif
+
+ifneq ($(CONFIG_MACSEC),)
+netdevsim-objs += macsec.o
+endif
diff --git a/drivers/net/netdevsim/macsec.c b/drivers/net/netdevsim/macsec.c
new file mode 100644
index 000000000000..0d5f50430dd3
--- /dev/null
+++ b/drivers/net/netdevsim/macsec.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <net/macsec.h>
+#include "netdevsim.h"
+
+static inline u64 sci_to_cpu(sci_t sci)
+{
+ return be64_to_cpu((__force __be64)sci);
+}
+
+static int nsim_macsec_find_secy(struct netdevsim *ns, sci_t sci)
+{
+ int i;
+
+ for (i = 0; i < NSIM_MACSEC_MAX_SECY_COUNT; i++) {
+ if (ns->macsec.nsim_secy[i].sci == sci)
+ return i;
+ }
+
+ return -1;
+}
+
+static int nsim_macsec_find_rxsc(struct nsim_secy *ns_secy, sci_t sci)
+{
+ int i;
+
+ for (i = 0; i < NSIM_MACSEC_MAX_RXSC_COUNT; i++) {
+ if (ns_secy->nsim_rxsc[i].sci == sci)
+ return i;
+ }
+
+ return -1;
+}
+
+static int nsim_macsec_add_secy(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ if (ns->macsec.nsim_secy_count == NSIM_MACSEC_MAX_SECY_COUNT)
+ return -ENOSPC;
+
+ for (idx = 0; idx < NSIM_MACSEC_MAX_SECY_COUNT; idx++) {
+ if (!ns->macsec.nsim_secy[idx].used)
+ break;
+ }
+
+ if (idx == NSIM_MACSEC_MAX_SECY_COUNT) {
+ netdev_err(ctx->netdev, "%s: nsim_secy_count not full but all SecYs used\n",
+ __func__);
+ return -ENOSPC;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: adding new secy with sci %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->secy->sci), idx);
+ ns->macsec.nsim_secy[idx].used = true;
+ ns->macsec.nsim_secy[idx].nsim_rxsc_count = 0;
+ ns->macsec.nsim_secy[idx].sci = ctx->secy->sci;
+ ns->macsec.nsim_secy_count++;
+
+ return 0;
+}
+
+static int nsim_macsec_upd_secy(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: updating secy with sci %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->secy->sci), idx);
+
+ return 0;
+}
+
+static int nsim_macsec_del_secy(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: removing SecY with SCI %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->secy->sci), idx);
+
+ ns->macsec.nsim_secy[idx].used = false;
+ memset(&ns->macsec.nsim_secy[idx], 0, sizeof(ns->macsec.nsim_secy[idx]));
+ ns->macsec.nsim_secy_count--;
+
+ return 0;
+}
+
+static int nsim_macsec_add_rxsc(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ if (secy->nsim_rxsc_count == NSIM_MACSEC_MAX_RXSC_COUNT)
+ return -ENOSPC;
+
+ for (idx = 0; idx < NSIM_MACSEC_MAX_RXSC_COUNT; idx++) {
+ if (!secy->nsim_rxsc[idx].used)
+ break;
+ }
+
+ if (idx == NSIM_MACSEC_MAX_RXSC_COUNT)
+ netdev_err(ctx->netdev, "%s: nsim_rxsc_count not full but all RXSCs used\n",
+ __func__);
+
+ netdev_dbg(ctx->netdev, "%s: adding new rxsc with sci %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->rx_sc->sci), idx);
+ secy->nsim_rxsc[idx].used = true;
+ secy->nsim_rxsc[idx].sci = ctx->rx_sc->sci;
+ secy->nsim_rxsc_count++;
+
+ return 0;
+}
+
+static int nsim_macsec_upd_rxsc(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ idx = nsim_macsec_find_rxsc(secy, ctx->rx_sc->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in RXSC table\n",
+ __func__, sci_to_cpu(ctx->rx_sc->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: updating RXSC with sci %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->rx_sc->sci), idx);
+
+ return 0;
+}
+
+static int nsim_macsec_del_rxsc(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ idx = nsim_macsec_find_rxsc(secy, ctx->rx_sc->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in RXSC table\n",
+ __func__, sci_to_cpu(ctx->rx_sc->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: removing RXSC with sci %08llx at index %d\n",
+ __func__, sci_to_cpu(ctx->rx_sc->sci), idx);
+
+ secy->nsim_rxsc[idx].used = false;
+ memset(&secy->nsim_rxsc[idx], 0, sizeof(secy->nsim_rxsc[idx]));
+ secy->nsim_rxsc_count--;
+
+ return 0;
+}
+
+static int nsim_macsec_add_rxsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in RXSC table\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: RXSC with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static int nsim_macsec_upd_rxsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in RXSC table\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: RXSC with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static int nsim_macsec_del_rxsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ struct nsim_secy *secy;
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+ secy = &ns->macsec.nsim_secy[idx];
+
+ idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in RXSC table\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: RXSC with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static int nsim_macsec_add_txsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: SECY with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static int nsim_macsec_upd_txsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: SECY with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static int nsim_macsec_del_txsa(struct macsec_context *ctx)
+{
+ struct netdevsim *ns = netdev_priv(ctx->netdev);
+ int idx;
+
+ idx = nsim_macsec_find_secy(ns, ctx->secy->sci);
+ if (idx < 0) {
+ netdev_err(ctx->netdev, "%s: sci %08llx not found in secy table\n",
+ __func__, sci_to_cpu(ctx->secy->sci));
+ return -ENOENT;
+ }
+
+ netdev_dbg(ctx->netdev, "%s: SECY with sci %08llx, AN %u\n",
+ __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num);
+
+ return 0;
+}
+
+static const struct macsec_ops nsim_macsec_ops = {
+ .mdo_add_secy = nsim_macsec_add_secy,
+ .mdo_upd_secy = nsim_macsec_upd_secy,
+ .mdo_del_secy = nsim_macsec_del_secy,
+ .mdo_add_rxsc = nsim_macsec_add_rxsc,
+ .mdo_upd_rxsc = nsim_macsec_upd_rxsc,
+ .mdo_del_rxsc = nsim_macsec_del_rxsc,
+ .mdo_add_rxsa = nsim_macsec_add_rxsa,
+ .mdo_upd_rxsa = nsim_macsec_upd_rxsa,
+ .mdo_del_rxsa = nsim_macsec_del_rxsa,
+ .mdo_add_txsa = nsim_macsec_add_txsa,
+ .mdo_upd_txsa = nsim_macsec_upd_txsa,
+ .mdo_del_txsa = nsim_macsec_del_txsa,
+};
+
+void nsim_macsec_init(struct netdevsim *ns)
+{
+ ns->netdev->macsec_ops = &nsim_macsec_ops;
+ ns->netdev->features |= NETIF_F_HW_MACSEC;
+ memset(&ns->macsec, 0, sizeof(ns->macsec));
+}
+
+void nsim_macsec_teardown(struct netdevsim *ns)
+{
+}
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 35fa1ca98671..0c8daeb0d62b 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -304,6 +304,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
if (err)
goto err_utn_destroy;
+ nsim_macsec_init(ns);
nsim_ipsec_init(ns);
err = register_netdevice(ns->netdev);
@@ -314,6 +315,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
err_ipsec_teardown:
nsim_ipsec_teardown(ns);
+ nsim_macsec_teardown(ns);
nsim_bpf_uninit(ns);
err_utn_destroy:
rtnl_unlock();
@@ -374,6 +376,7 @@ void nsim_destroy(struct netdevsim *ns)
rtnl_lock();
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
+ nsim_macsec_teardown(ns);
nsim_ipsec_teardown(ns);
nsim_bpf_uninit(ns);
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 7d8ed8d8df5c..7be98b7dcca9 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -23,6 +23,7 @@
#include <net/devlink.h>
#include <net/udp_tunnel.h>
#include <net/xdp.h>
+#include <net/macsec.h>
#define DRV_NAME "netdevsim"
@@ -52,6 +53,25 @@ struct nsim_ipsec {
u32 ok;
};
+#define NSIM_MACSEC_MAX_SECY_COUNT 3
+#define NSIM_MACSEC_MAX_RXSC_COUNT 1
+struct nsim_rxsc {
+ sci_t sci;
+ bool used;
+};
+
+struct nsim_secy {
+ sci_t sci;
+ struct nsim_rxsc nsim_rxsc[NSIM_MACSEC_MAX_RXSC_COUNT];
+ u8 nsim_rxsc_count;
+ bool used;
+};
+
+struct nsim_macsec {
+ struct nsim_secy nsim_secy[NSIM_MACSEC_MAX_SECY_COUNT];
+ u8 nsim_secy_count;
+};
+
struct nsim_ethtool_pauseparam {
bool rx;
bool tx;
@@ -93,6 +113,7 @@ struct netdevsim {
bool bpf_map_accept;
struct nsim_ipsec ipsec;
+ struct nsim_macsec macsec;
struct {
u32 inject_error;
u32 sleep;
@@ -366,6 +387,19 @@ static inline bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb)
}
#endif
+#if IS_ENABLED(CONFIG_MACSEC)
+void nsim_macsec_init(struct netdevsim *ns);
+void nsim_macsec_teardown(struct netdevsim *ns);
+#else
+static inline void nsim_macsec_init(struct netdevsim *ns)
+{
+}
+
+static inline void nsim_macsec_teardown(struct netdevsim *ns)
+{
+}
+#endif
+
struct nsim_bus_dev {
struct device dev;
struct list_head list;
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
index 323bec5e57f8..e5d642c67a2c 100644
--- a/drivers/net/pcs/pcs-rzn1-miic.c
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -12,6 +12,7 @@
#include <linux/of_platform.h>
#include <linux/pcs-rzn1-miic.h>
#include <linux/phylink.h>
+#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <dt-bindings/net/pcs-rzn1-miic.h>
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 78e6981650d9..67aaeb75301f 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -217,6 +217,12 @@ config MARVELL_10G_PHY
help
Support for the Marvell Alaska MV88X3310 and compatible PHYs.
+config MARVELL_88Q2XXX_PHY
+ tristate "Marvell 88Q2XXX PHY"
+ help
+ Support for the Marvell 88Q2XXX 100/1000BASE-T1 Automotive Ethernet
+ PHYs.
+
config MARVELL_88X2222_PHY
tristate "Marvell 88X2222 PHY"
help
@@ -344,6 +350,7 @@ config ROCKCHIP_PHY
config SMSC_PHY
tristate "SMSC PHYs"
+ select CRC16
help
Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 2fe51ea83bab..35142780fc9d 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o
obj-$(CONFIG_LXT_PHY) += lxt.o
obj-$(CONFIG_MARVELL_10G_PHY) += marvell10g.o
obj-$(CONFIG_MARVELL_PHY) += marvell.o
+obj-$(CONFIG_MARVELL_88Q2XXX_PHY) += marvell-88q2xxx.o
obj-$(CONFIG_MARVELL_88X2222_PHY) += marvell-88x2222.o
obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o
obj-$(CONFIG_MEDIATEK_GE_PHY) += mediatek-ge.o
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c1f307d90518..13c4121fa309 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -272,6 +272,13 @@
#define QCA808X_CDT_STATUS_STAT_OPEN 2
#define QCA808X_CDT_STATUS_STAT_SHORT 3
+/* QCA808X 1G chip type */
+#define QCA808X_PHY_MMD7_CHIP_TYPE 0x901d
+#define QCA808X_PHY_CHIP_TYPE_1G BIT(0)
+
+#define QCA8081_PHY_SERDES_MMD1_FIFO_CTRL 0x9072
+#define QCA8081_PHY_FIFO_RSTN BIT(11)
+
MODULE_DESCRIPTION("Qualcomm Atheros AR803x and QCA808X PHY driver");
MODULE_AUTHOR("Matus Ujhelyi");
MODULE_LICENSE("GPL");
@@ -897,15 +904,6 @@ static int at803x_get_features(struct phy_device *phydev)
if (err)
return err;
- if (phydev->drv->phy_id == QCA8081_PHY_ID) {
- err = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_NG_EXTABLE);
- if (err < 0)
- return err;
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported,
- err & MDIO_PMA_NG_EXTABLE_2_5GBT);
- }
-
if (phydev->drv->phy_id != ATH8031_PHY_ID)
return 0;
@@ -1734,24 +1732,30 @@ static int qca808x_phy_fast_retrain_config(struct phy_device *phydev)
return 0;
}
-static int qca808x_phy_ms_random_seed_set(struct phy_device *phydev)
+static int qca808x_phy_ms_seed_enable(struct phy_device *phydev, bool enable)
{
- u16 seed_value = get_random_u32_below(QCA808X_MASTER_SLAVE_SEED_RANGE);
+ u16 seed_value;
+
+ if (!enable)
+ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
+ QCA808X_MASTER_SLAVE_SEED_ENABLE, 0);
+ seed_value = get_random_u32_below(QCA808X_MASTER_SLAVE_SEED_RANGE);
return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
- QCA808X_MASTER_SLAVE_SEED_CFG,
- FIELD_PREP(QCA808X_MASTER_SLAVE_SEED_CFG, seed_value));
+ QCA808X_MASTER_SLAVE_SEED_CFG | QCA808X_MASTER_SLAVE_SEED_ENABLE,
+ FIELD_PREP(QCA808X_MASTER_SLAVE_SEED_CFG, seed_value) |
+ QCA808X_MASTER_SLAVE_SEED_ENABLE);
}
-static int qca808x_phy_ms_seed_enable(struct phy_device *phydev, bool enable)
+static bool qca808x_is_prefer_master(struct phy_device *phydev)
{
- u16 seed_enable = 0;
-
- if (enable)
- seed_enable = QCA808X_MASTER_SLAVE_SEED_ENABLE;
+ return (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_FORCE) ||
+ (phydev->master_slave_get == MASTER_SLAVE_CFG_MASTER_PREFERRED);
+}
- return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_LOCAL_SEED,
- QCA808X_MASTER_SLAVE_SEED_ENABLE, seed_enable);
+static bool qca808x_has_fast_retrain_or_slave_seed(struct phy_device *phydev)
+{
+ return linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
}
static int qca808x_config_init(struct phy_device *phydev)
@@ -1770,20 +1774,25 @@ static int qca808x_config_init(struct phy_device *phydev)
if (ret)
return ret;
- /* Config the fast retrain for the link 2500M */
- ret = qca808x_phy_fast_retrain_config(phydev);
- if (ret)
- return ret;
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
+ /* Config the fast retrain for the link 2500M */
+ ret = qca808x_phy_fast_retrain_config(phydev);
+ if (ret)
+ return ret;
- /* Configure lower ramdom seed to make phy linked as slave mode */
- ret = qca808x_phy_ms_random_seed_set(phydev);
- if (ret)
- return ret;
+ ret = genphy_read_master_slave(phydev);
+ if (ret < 0)
+ return ret;
- /* Enable seed */
- ret = qca808x_phy_ms_seed_enable(phydev, true);
- if (ret)
- return ret;
+ if (!qca808x_is_prefer_master(phydev)) {
+ /* Enable seed and configure lower ramdom seed to make phy
+ * linked as slave mode.
+ */
+ ret = qca808x_phy_ms_seed_enable(phydev, true);
+ if (ret)
+ return ret;
+ }
+ }
/* Configure adc threshold as 100mv for the link 10M */
return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_ADC_THRESHOLD,
@@ -1816,17 +1825,21 @@ static int qca808x_read_status(struct phy_device *phydev)
phydev->interface = PHY_INTERFACE_MODE_SGMII;
} else {
/* generate seed as a lower random value to make PHY linked as SLAVE easily,
- * except for master/slave configuration fault detected.
+ * except for master/slave configuration fault detected or the master mode
+ * preferred.
+ *
* the reason for not putting this code into the function link_change_notify is
* the corner case where the link partner is also the qca8081 PHY and the seed
* value is configured as the same value, the link can't be up and no link change
* occurs.
*/
- if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) {
- qca808x_phy_ms_seed_enable(phydev, false);
- } else {
- qca808x_phy_ms_random_seed_set(phydev);
- qca808x_phy_ms_seed_enable(phydev, true);
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev)) {
+ if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR ||
+ qca808x_is_prefer_master(phydev)) {
+ qca808x_phy_ms_seed_enable(phydev, false);
+ } else {
+ qca808x_phy_ms_seed_enable(phydev, true);
+ }
}
}
@@ -1841,7 +1854,10 @@ static int qca808x_soft_reset(struct phy_device *phydev)
if (ret < 0)
return ret;
- return qca808x_phy_ms_seed_enable(phydev, true);
+ if (qca808x_has_fast_retrain_or_slave_seed(phydev))
+ ret = qca808x_phy_ms_seed_enable(phydev, true);
+
+ return ret;
}
static bool qca808x_cdt_fault_length_valid(int cdt_code)
@@ -1991,6 +2007,44 @@ static int qca808x_cable_test_get_status(struct phy_device *phydev, bool *finish
return 0;
}
+static int qca808x_get_features(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_c45_pma_read_abilities(phydev);
+ if (ret)
+ return ret;
+
+ /* The autoneg ability is not existed in bit3 of MMD7.1,
+ * but it is supported by qca808x PHY, so we add it here
+ * manually.
+ */
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported);
+
+ /* As for the qca8081 1G version chip, the 2500baseT ability is also
+ * existed in the bit0 of MMD1.21, we need to remove it manually if
+ * it is the qca8081 1G chip according to the bit0 of MMD7.0x901d.
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_CHIP_TYPE);
+ if (ret < 0)
+ return ret;
+
+ if (QCA808X_PHY_CHIP_TYPE_1G & ret)
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported);
+
+ return 0;
+}
+
+static void qca808x_link_change_notify(struct phy_device *phydev)
+{
+ /* Assert interface sgmii fifo on link down, deassert it on link up,
+ * the interface device address is always phy address added by 1.
+ */
+ mdiobus_c45_modify_changed(phydev->mdio.bus, phydev->mdio.addr + 1,
+ MDIO_MMD_PMAPMD, QCA8081_PHY_SERDES_MMD1_FIFO_CTRL,
+ QCA8081_PHY_FIFO_RSTN, phydev->link ? QCA8081_PHY_FIFO_RSTN : 0);
+}
+
static struct phy_driver at803x_driver[] = {
{
/* Qualcomm Atheros AR8035 */
@@ -2160,7 +2214,7 @@ static struct phy_driver at803x_driver[] = {
.set_tunable = at803x_set_tunable,
.set_wol = at803x_set_wol,
.get_wol = at803x_get_wol,
- .get_features = at803x_get_features,
+ .get_features = qca808x_get_features,
.config_aneg = at803x_config_aneg,
.suspend = genphy_suspend,
.resume = genphy_resume,
@@ -2169,6 +2223,7 @@ static struct phy_driver at803x_driver[] = {
.soft_reset = qca808x_soft_reset,
.cable_test_start = qca808x_cable_test_start,
.cable_test_get_status = qca808x_cable_test_get_status,
+ .link_change_notify = qca808x_link_change_notify,
}, };
module_phy_driver(at803x_driver);
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index f8c17a253f8b..8478b081c058 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -913,6 +913,7 @@ static struct phy_driver bcm7xxx_driver[] = {
BCM7XXX_28NM_GPHY(PHY_ID_BCM7278, "Broadcom BCM7278"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"),
+ BCM7XXX_16NM_EPHY(PHY_ID_BCM74165, "Broadcom BCM74165"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM74371, "Broadcom BCM74371"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
new file mode 100644
index 000000000000..1c3ff77de56b
--- /dev/null
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Marvell 88Q2XXX automotive 100BASE-T1/1000BASE-T1 PHY driver
+ */
+#include <linux/ethtool_netlink.h>
+#include <linux/marvell_phy.h>
+#include <linux/phy.h>
+
+#define MDIO_MMD_AN_MV_STAT 32769
+#define MDIO_MMD_AN_MV_STAT_ANEG 0x0100
+#define MDIO_MMD_AN_MV_STAT_LOCAL_RX 0x1000
+#define MDIO_MMD_AN_MV_STAT_REMOTE_RX 0x2000
+#define MDIO_MMD_AN_MV_STAT_LOCAL_MASTER 0x4000
+#define MDIO_MMD_AN_MV_STAT_MS_CONF_FAULT 0x8000
+
+#define MDIO_MMD_PCS_MV_100BT1_STAT1 33032
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_IDLE_ERROR 0x00FF
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_JABBER 0x0100
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_LINK 0x0200
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_RX 0x1000
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_REMOTE_RX 0x2000
+#define MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_MASTER 0x4000
+
+#define MDIO_MMD_PCS_MV_100BT1_STAT2 33033
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_JABBER 0x0001
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_POL 0x0002
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_LINK 0x0004
+#define MDIO_MMD_PCS_MV_100BT1_STAT2_ANGE 0x0008
+
+static int mv88q2xxx_soft_reset(struct phy_device *phydev)
+{
+ int ret;
+ int val;
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_PCS_1000BT1_CTRL, MDIO_PCS_1000BT1_CTRL_RESET);
+ if (ret < 0)
+ return ret;
+
+ return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PCS,
+ MDIO_PCS_1000BT1_CTRL, val,
+ !(val & MDIO_PCS_1000BT1_CTRL_RESET),
+ 50000, 600000, true);
+}
+
+static int mv88q2xxx_read_link_gbit(struct phy_device *phydev)
+{
+ int ret;
+ bool link = false;
+
+ /* Read vendor specific Auto-Negotiation status register to get local
+ * and remote receiver status according to software initialization
+ * guide.
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_MMD_AN_MV_STAT);
+ if (ret < 0) {
+ return ret;
+ } else if ((ret & MDIO_MMD_AN_MV_STAT_LOCAL_RX) &&
+ (ret & MDIO_MMD_AN_MV_STAT_REMOTE_RX)) {
+ /* The link state is latched low so that momentary link
+ * drops can be detected. Do not double-read the status
+ * in polling mode to detect such short link drops except
+ * the link was already down.
+ */
+ if (!phy_polling_mode(phydev) || !phydev->link) {
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_1000BT1_STAT);
+ if (ret < 0)
+ return ret;
+ else if (ret & MDIO_PCS_1000BT1_STAT_LINK)
+ link = true;
+ }
+
+ if (!link) {
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_1000BT1_STAT);
+ if (ret < 0)
+ return ret;
+ else if (ret & MDIO_PCS_1000BT1_STAT_LINK)
+ link = true;
+ }
+ }
+
+ phydev->link = link;
+
+ return 0;
+}
+
+static int mv88q2xxx_read_link_100m(struct phy_device *phydev)
+{
+ int ret;
+
+ /* The link state is latched low so that momentary link
+ * drops can be detected. Do not double-read the status
+ * in polling mode to detect such short link drops except
+ * the link was already down. In case we are not polling,
+ * we always read the realtime status.
+ */
+ if (!phy_polling_mode(phydev) || !phydev->link) {
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_100BT1_STAT1);
+ if (ret < 0)
+ return ret;
+ else if (ret & MDIO_MMD_PCS_MV_100BT1_STAT1_LINK)
+ goto out;
+ }
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_100BT1_STAT1);
+ if (ret < 0)
+ return ret;
+
+out:
+ /* Check if we have link and if the remote and local receiver are ok */
+ if ((ret & MDIO_MMD_PCS_MV_100BT1_STAT1_LINK) &&
+ (ret & MDIO_MMD_PCS_MV_100BT1_STAT1_LOCAL_RX) &&
+ (ret & MDIO_MMD_PCS_MV_100BT1_STAT1_REMOTE_RX))
+ phydev->link = true;
+ else
+ phydev->link = false;
+
+ return 0;
+}
+
+static int mv88q2xxx_read_link(struct phy_device *phydev)
+{
+ int ret;
+
+ /* The 88Q2XXX PHYs do not have the PMA/PMD status register available,
+ * therefore we need to read the link status from the vendor specific
+ * registers depending on the speed.
+ */
+ if (phydev->speed == SPEED_1000)
+ ret = mv88q2xxx_read_link_gbit(phydev);
+ else
+ ret = mv88q2xxx_read_link_100m(phydev);
+
+ return ret;
+}
+
+static int mv88q2xxx_read_status(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = mv88q2xxx_read_link(phydev);
+ if (ret < 0)
+ return ret;
+
+ return genphy_c45_read_pma(phydev);
+}
+
+static int mv88q2xxx_get_features(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_c45_pma_read_abilities(phydev);
+ if (ret)
+ return ret;
+
+ /* We need to read the baset1 extended abilities manually because the
+ * PHY does not signalize it has the extended abilities register
+ * available.
+ */
+ ret = genphy_c45_pma_baset1_read_abilities(phydev);
+ if (ret)
+ return ret;
+
+ /* The PHY signalizes it supports autonegotiation. Unfortunately, so
+ * far it was not possible to get a link even when following the init
+ * sequence provided by Marvell. Disable it for now until a proper
+ * workaround is found or a new PHY revision is released.
+ */
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported);
+
+ return 0;
+}
+
+static int mv88q2xxx_config_aneg(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_c45_config_aneg(phydev);
+ if (ret)
+ return ret;
+
+ return mv88q2xxx_soft_reset(phydev);
+}
+
+static int mv88q2xxx_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ /* The 88Q2XXX PHYs do have the extended ability register available, but
+ * register MDIO_PMA_EXTABLE where they should signalize it does not
+ * work according to specification. Therefore, we force it here.
+ */
+ phydev->pma_extable = MDIO_PMA_EXTABLE_BT1;
+
+ /* Read the current PHY configuration */
+ ret = genphy_c45_read_pma(phydev);
+ if (ret)
+ return ret;
+
+ return mv88q2xxx_config_aneg(phydev);
+}
+
+static int mv88q2xxxx_get_sqi(struct phy_device *phydev)
+{
+ int ret;
+
+ if (phydev->speed == SPEED_100) {
+ /* Read the SQI from the vendor specific receiver status
+ * register
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x8230);
+ if (ret < 0)
+ return ret;
+
+ ret = ret >> 12;
+ } else {
+ /* Read from vendor specific registers, they are not documented
+ * but can be found in the Software Initialization Guide. Only
+ * revisions >= A0 are supported.
+ */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, 0xFC5D, 0x00FF, 0x00AC);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_PCS, 0xfc88);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret & 0x0F;
+}
+
+static int mv88q2xxxx_get_sqi_max(struct phy_device *phydev)
+{
+ return 15;
+}
+
+static struct phy_driver mv88q2xxx_driver[] = {
+ {
+ .phy_id = MARVELL_PHY_ID_88Q2110,
+ .phy_id_mask = MARVELL_PHY_ID_MASK,
+ .name = "mv88q2110",
+ .get_features = mv88q2xxx_get_features,
+ .config_aneg = mv88q2xxx_config_aneg,
+ .config_init = mv88q2xxx_config_init,
+ .read_status = mv88q2xxx_read_status,
+ .soft_reset = mv88q2xxx_soft_reset,
+ .set_loopback = genphy_c45_loopback,
+ .get_sqi = mv88q2xxxx_get_sqi,
+ .get_sqi_max = mv88q2xxxx_get_sqi_max,
+ },
+};
+
+module_phy_driver(mv88q2xxx_driver);
+
+static struct mdio_device_id __maybe_unused mv88q2xxx_tbl[] = {
+ { MARVELL_PHY_ID_88Q2110, MARVELL_PHY_ID_MASK },
+ { /*sentinel*/ }
+};
+MODULE_DEVICE_TABLE(mdio, mv88q2xxx_tbl);
+
+MODULE_DESCRIPTION("Marvell 88Q2XXX 100/1000BASE-T1 Automotive Ethernet PHY driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index f83cae64585d..e3aa30dad2e6 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -14,7 +14,6 @@
#include <linux/mdio.h>
#include <linux/marvell_phy.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_gpio.h>
#include <linux/sfp.h>
#include <linux/netdevice.h>
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 8b3618d3da4a..25dcaa49ab8b 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -107,16 +107,21 @@ int mdiobus_unregister_device(struct mdio_device *mdiodev)
}
EXPORT_SYMBOL(mdiobus_unregister_device);
-struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr)
+static struct mdio_device *mdiobus_find_device(struct mii_bus *bus, int addr)
{
bool addr_valid = addr >= 0 && addr < ARRAY_SIZE(bus->mdio_map);
- struct mdio_device *mdiodev;
if (WARN_ONCE(!addr_valid, "addr %d out of range\n", addr))
return NULL;
- mdiodev = bus->mdio_map[addr];
+ return bus->mdio_map[addr];
+}
+
+struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr)
+{
+ struct mdio_device *mdiodev;
+ mdiodev = mdiobus_find_device(bus, addr);
if (!mdiodev)
return NULL;
@@ -129,7 +134,7 @@ EXPORT_SYMBOL(mdiobus_get_phy);
bool mdiobus_is_registered_device(struct mii_bus *bus, int addr)
{
- return bus->mdio_map[addr];
+ return mdiobus_find_device(bus, addr) != NULL;
}
EXPORT_SYMBOL(mdiobus_is_registered_device);
@@ -1210,6 +1215,26 @@ int mdiobus_c45_write_nested(struct mii_bus *bus, int addr, int devad,
}
EXPORT_SYMBOL(mdiobus_c45_write_nested);
+/*
+ * __mdiobus_modify - Convenience function for modifying a given mdio device
+ * register
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to write
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ */
+int __mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
+ u16 set)
+{
+ int err;
+
+ err = __mdiobus_modify_changed(bus, addr, regnum, mask, set);
+
+ return err < 0 ? err : 0;
+}
+EXPORT_SYMBOL_GPL(__mdiobus_modify);
+
/**
* mdiobus_modify - Convenience function for modifying a given mdio device
* register
@@ -1224,10 +1249,10 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set)
int err;
mutex_lock(&bus->mdio_lock);
- err = __mdiobus_modify_changed(bus, addr, regnum, mask, set);
+ err = __mdiobus_modify(bus, addr, regnum, mask, set);
mutex_unlock(&bus->mdio_lock);
- return err < 0 ? err : 0;
+ return err;
}
EXPORT_SYMBOL_GPL(mdiobus_modify);
diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c
index 95369171a7ba..da512fab0eb0 100644
--- a/drivers/net/phy/mediatek-ge-soc.c
+++ b/drivers/net/phy/mediatek-ge-soc.c
@@ -2,8 +2,6 @@
#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/nvmem-consumer.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <linux/pinctrl/consumer.h>
#include <linux/phy.h>
diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
index 2fa5a90e073b..7a11fdb687cc 100644
--- a/drivers/net/phy/motorcomm.c
+++ b/drivers/net/phy/motorcomm.c
@@ -163,6 +163,10 @@
#define YT8521_CHIP_CONFIG_REG 0xA001
#define YT8521_CCR_SW_RST BIT(15)
+#define YT8531_RGMII_LDO_VOL_MASK GENMASK(5, 4)
+#define YT8531_LDO_VOL_3V3 0x0
+#define YT8531_LDO_VOL_1V8 0x2
+
/* 1b0 disable 1.9ns rxc clock delay *default*
* 1b1 enable 1.9ns rxc clock delay
*/
@@ -236,6 +240,12 @@
*/
#define YTPHY_WCR_TYPE_PULSE BIT(0)
+#define YTPHY_PAD_DRIVE_STRENGTH_REG 0xA010
+#define YT8531_RGMII_RXC_DS_MASK GENMASK(15, 13)
+#define YT8531_RGMII_RXD_DS_HI_MASK BIT(12) /* Bit 2 of rxd_ds */
+#define YT8531_RGMII_RXD_DS_LOW_MASK GENMASK(5, 4) /* Bit 1/0 of rxd_ds */
+#define YT8531_RGMII_RX_DS_DEFAULT 0x3
+
#define YTPHY_SYNCE_CFG_REG 0xA012
#define YT8521_SCR_SYNCE_ENABLE BIT(5)
/* 1b0 output 25m clock
@@ -835,6 +845,110 @@ static int ytphy_rgmii_clk_delay_config_with_lock(struct phy_device *phydev)
}
/**
+ * struct ytphy_ldo_vol_map - map a current value to a register value
+ * @vol: ldo voltage
+ * @ds: value in the register
+ * @cur: value in device configuration
+ */
+struct ytphy_ldo_vol_map {
+ u32 vol;
+ u32 ds;
+ u32 cur;
+};
+
+static const struct ytphy_ldo_vol_map yt8531_ldo_vol[] = {
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 0, .cur = 1200},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 1, .cur = 2100},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 2, .cur = 2700},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 3, .cur = 2910},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 4, .cur = 3110},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 5, .cur = 3600},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 6, .cur = 3970},
+ {.vol = YT8531_LDO_VOL_1V8, .ds = 7, .cur = 4350},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 0, .cur = 3070},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 1, .cur = 4080},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 2, .cur = 4370},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 3, .cur = 4680},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 4, .cur = 5020},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 5, .cur = 5450},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 6, .cur = 5740},
+ {.vol = YT8531_LDO_VOL_3V3, .ds = 7, .cur = 6140},
+};
+
+static u32 yt8531_get_ldo_vol(struct phy_device *phydev)
+{
+ u32 val;
+
+ val = ytphy_read_ext_with_lock(phydev, YT8521_CHIP_CONFIG_REG);
+ val = FIELD_GET(YT8531_RGMII_LDO_VOL_MASK, val);
+
+ return val <= YT8531_LDO_VOL_1V8 ? val : YT8531_LDO_VOL_1V8;
+}
+
+static int yt8531_get_ds_map(struct phy_device *phydev, u32 cur)
+{
+ u32 vol;
+ int i;
+
+ vol = yt8531_get_ldo_vol(phydev);
+ for (i = 0; i < ARRAY_SIZE(yt8531_ldo_vol); i++) {
+ if (yt8531_ldo_vol[i].vol == vol && yt8531_ldo_vol[i].cur == cur)
+ return yt8531_ldo_vol[i].ds;
+ }
+
+ return -EINVAL;
+}
+
+static int yt8531_set_ds(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ u32 ds_field_low, ds_field_hi, val;
+ int ret, ds;
+
+ /* set rgmii rx clk driver strength */
+ if (!of_property_read_u32(node, "motorcomm,rx-clk-drv-microamp", &val)) {
+ ds = yt8531_get_ds_map(phydev, val);
+ if (ds < 0)
+ return dev_err_probe(&phydev->mdio.dev, ds,
+ "No matching current value was found.\n");
+ } else {
+ ds = YT8531_RGMII_RX_DS_DEFAULT;
+ }
+
+ ret = ytphy_modify_ext_with_lock(phydev,
+ YTPHY_PAD_DRIVE_STRENGTH_REG,
+ YT8531_RGMII_RXC_DS_MASK,
+ FIELD_PREP(YT8531_RGMII_RXC_DS_MASK, ds));
+ if (ret < 0)
+ return ret;
+
+ /* set rgmii rx data driver strength */
+ if (!of_property_read_u32(node, "motorcomm,rx-data-drv-microamp", &val)) {
+ ds = yt8531_get_ds_map(phydev, val);
+ if (ds < 0)
+ return dev_err_probe(&phydev->mdio.dev, ds,
+ "No matching current value was found.\n");
+ } else {
+ ds = YT8531_RGMII_RX_DS_DEFAULT;
+ }
+
+ ds_field_hi = FIELD_GET(BIT(2), ds);
+ ds_field_hi = FIELD_PREP(YT8531_RGMII_RXD_DS_HI_MASK, ds_field_hi);
+
+ ds_field_low = FIELD_GET(GENMASK(1, 0), ds);
+ ds_field_low = FIELD_PREP(YT8531_RGMII_RXD_DS_LOW_MASK, ds_field_low);
+
+ ret = ytphy_modify_ext_with_lock(phydev,
+ YTPHY_PAD_DRIVE_STRENGTH_REG,
+ YT8531_RGMII_RXD_DS_LOW_MASK | YT8531_RGMII_RXD_DS_HI_MASK,
+ ds_field_low | ds_field_hi);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
* yt8521_probe() - read chip config then set suitable polling_mode
* @phydev: a pointer to a &struct phy_device
*
@@ -1518,6 +1632,10 @@ static int yt8531_config_init(struct phy_device *phydev)
return ret;
}
+ ret = yt8531_set_ds(phydev);
+ if (ret < 0)
+ return ret;
+
return 0;
}
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 93ed07223377..8e6fd4962c48 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_pma_baset1_setup_master_slave);
*/
int genphy_c45_pma_setup_forced(struct phy_device *phydev)
{
- int ctrl1, ctrl2, ret;
+ int bt1_ctrl, ctrl1, ctrl2, ret;
/* Half duplex is not supported */
if (phydev->duplex != DUPLEX_FULL)
@@ -176,6 +176,15 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev)
ret = genphy_c45_pma_baset1_setup_master_slave(phydev);
if (ret < 0)
return ret;
+
+ bt1_ctrl = 0;
+ if (phydev->speed == SPEED_1000)
+ bt1_ctrl = MDIO_PMA_PMD_BT1_CTRL_STRAP_B1000;
+
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1_CTRL,
+ MDIO_PMA_PMD_BT1_CTRL_STRAP, bt1_ctrl);
+ if (ret < 0)
+ return ret;
}
return genphy_c45_an_disable_aneg(phydev);
@@ -873,6 +882,44 @@ int genphy_c45_an_config_eee_aneg(struct phy_device *phydev)
}
/**
+ * genphy_c45_pma_baset1_read_abilities - read supported baset1 link modes from PMA
+ * @phydev: target phy_device struct
+ *
+ * Read the supported link modes from the extended BASE-T1 ability register
+ */
+int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev)
+{
+ int val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1);
+ if (val < 0)
+ return val;
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
+ phydev->supported,
+ val & MDIO_PMA_PMD_BT1_B10L_ABLE);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
+ phydev->supported,
+ val & MDIO_PMA_PMD_BT1_B100_ABLE);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT,
+ phydev->supported,
+ val & MDIO_PMA_PMD_BT1_B1000_ABLE);
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_STAT);
+ if (val < 0)
+ return val;
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ phydev->supported,
+ val & MDIO_AN_STAT1_ABLE);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_baset1_read_abilities);
+
+/**
* genphy_c45_pma_read_abilities - read supported link modes from PMA
* @phydev: target phy_device struct
*
@@ -968,21 +1015,9 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev)
}
if (val & MDIO_PMA_EXTABLE_BT1) {
- val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1);
+ val = genphy_c45_pma_baset1_read_abilities(phydev);
if (val < 0)
return val;
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
- phydev->supported,
- val & MDIO_PMA_PMD_BT1_B10L_ABLE);
-
- val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_STAT);
- if (val < 0)
- return val;
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
- phydev->supported,
- val & MDIO_AN_STAT1_ABLE);
}
}
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index d0aaa5cad853..4f1c8bb199e9 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -34,6 +34,10 @@ enum {
PHYLINK_DISABLE_STOPPED,
PHYLINK_DISABLE_LINK,
PHYLINK_DISABLE_MAC_WOL,
+
+ PCS_STATE_DOWN = 0,
+ PCS_STATE_STARTING,
+ PCS_STATE_STARTED,
};
/**
@@ -72,6 +76,7 @@ struct phylink {
struct phylink_link_state phy_state;
struct work_struct resolve;
unsigned int pcs_neg_mode;
+ unsigned int pcs_state;
bool mac_link_dropped;
bool using_mac_select_pcs;
@@ -993,6 +998,40 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state)
}
}
+static void phylink_pcs_pre_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ if (pcs && pcs->ops->pcs_pre_config)
+ pcs->ops->pcs_pre_config(pcs, interface);
+}
+
+static int phylink_pcs_post_config(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ int err = 0;
+
+ if (pcs && pcs->ops->pcs_post_config)
+ err = pcs->ops->pcs_post_config(pcs, interface);
+
+ return err;
+}
+
+static void phylink_pcs_disable(struct phylink_pcs *pcs)
+{
+ if (pcs && pcs->ops->pcs_disable)
+ pcs->ops->pcs_disable(pcs);
+}
+
+static int phylink_pcs_enable(struct phylink_pcs *pcs)
+{
+ int err = 0;
+
+ if (pcs && pcs->ops->pcs_enable)
+ err = pcs->ops->pcs_enable(pcs);
+
+ return err;
+}
+
static int phylink_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
const struct phylink_link_state *state,
bool permit_pause_to_mac)
@@ -1027,30 +1066,33 @@ static void phylink_pcs_poll_start(struct phylink *pl)
static void phylink_mac_config(struct phylink *pl,
const struct phylink_link_state *state)
{
+ struct phylink_link_state st = *state;
+
+ /* Stop drivers incorrectly using these */
+ linkmode_zero(st.lp_advertising);
+ st.speed = SPEED_UNKNOWN;
+ st.duplex = DUPLEX_UNKNOWN;
+ st.an_complete = false;
+ st.link = false;
+
phylink_dbg(pl,
- "%s: mode=%s/%s/%s/%s/%s adv=%*pb pause=%02x link=%u\n",
+ "%s: mode=%s/%s/%s adv=%*pb pause=%02x\n",
__func__, phylink_an_mode_str(pl->cur_link_an_mode),
- phy_modes(state->interface),
- phy_speed_to_str(state->speed),
- phy_duplex_to_str(state->duplex),
- phy_rate_matching_to_str(state->rate_matching),
- __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
- state->pause, state->link);
+ phy_modes(st.interface),
+ phy_rate_matching_to_str(st.rate_matching),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, st.advertising,
+ st.pause);
- pl->mac_ops->mac_config(pl->config, pl->cur_link_an_mode, state);
+ pl->mac_ops->mac_config(pl->config, pl->cur_link_an_mode, &st);
}
-static void phylink_mac_pcs_an_restart(struct phylink *pl)
+static void phylink_pcs_an_restart(struct phylink *pl)
{
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
- pl->link_config.advertising) &&
+ if (pl->pcs && linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ pl->link_config.advertising) &&
phy_interface_mode_is_8023z(pl->link_config.interface) &&
- phylink_autoneg_inband(pl->cur_link_an_mode)) {
- if (pl->pcs)
- pl->pcs->ops->pcs_an_restart(pl->pcs);
- else if (pl->config->legacy_pre_march2020)
- pl->mac_ops->mac_an_restart(pl->config);
- }
+ phylink_autoneg_inband(pl->cur_link_an_mode))
+ pl->pcs->ops->pcs_an_restart(pl->pcs);
}
static void phylink_major_config(struct phylink *pl, bool restart,
@@ -1095,11 +1137,28 @@ static void phylink_major_config(struct phylink *pl, bool restart,
/* If we have a new PCS, switch to the new PCS after preparing the MAC
* for the change.
*/
- if (pcs_changed)
+ if (pcs_changed) {
+ phylink_pcs_disable(pl->pcs);
+
+ if (pl->pcs)
+ pl->pcs->phylink = NULL;
+
+ pcs->phylink = pl;
+
pl->pcs = pcs;
+ }
+
+ if (pl->pcs)
+ phylink_pcs_pre_config(pl->pcs, state->interface);
phylink_mac_config(pl, state);
+ if (pl->pcs)
+ phylink_pcs_post_config(pl->pcs, state->interface);
+
+ if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed)
+ phylink_pcs_enable(pl->pcs);
+
neg_mode = pl->cur_link_an_mode;
if (pl->pcs && pl->pcs->neg_mode)
neg_mode = pl->pcs_neg_mode;
@@ -1113,7 +1172,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
restart = true;
if (restart)
- phylink_mac_pcs_an_restart(pl);
+ phylink_pcs_an_restart(pl);
if (pl->mac_ops->mac_finish) {
err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode,
@@ -1146,13 +1205,6 @@ static int phylink_change_inband_advert(struct phylink *pl)
if (test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state))
return 0;
- if (!pl->pcs && pl->config->legacy_pre_march2020) {
- /* Legacy method */
- phylink_mac_config(pl, &pl->link_config);
- phylink_mac_pcs_an_restart(pl);
- return 0;
- }
-
phylink_dbg(pl, "%s: mode=%s/%s adv=%*pb pause=%02x\n", __func__,
phylink_an_mode_str(pl->cur_link_an_mode),
phy_modes(pl->link_config.interface),
@@ -1178,7 +1230,7 @@ static int phylink_change_inband_advert(struct phylink *pl)
return ret;
if (ret > 0)
- phylink_mac_pcs_an_restart(pl);
+ phylink_pcs_an_restart(pl);
return 0;
}
@@ -1205,9 +1257,6 @@ static void phylink_mac_pcs_get_state(struct phylink *pl,
if (pl->pcs)
pl->pcs->ops->pcs_get_state(pl->pcs, state);
- else if (pl->mac_ops->mac_pcs_get_state &&
- pl->config->legacy_pre_march2020)
- pl->mac_ops->mac_pcs_get_state(pl->config, state);
else
state->link = 0;
}
@@ -1440,13 +1489,6 @@ static void phylink_resolve(struct work_struct *w)
}
phylink_major_config(pl, false, &link_state);
pl->link_config.interface = link_state.interface;
- } else if (!pl->pcs && pl->config->legacy_pre_march2020) {
- /* The interface remains unchanged, only the speed,
- * duplex or pause settings have changed. Call the
- * old mac_config() method to configure the MAC/PCS
- * only if we do not have a legacy MAC driver.
- */
- phylink_mac_config(pl, &link_state);
}
}
@@ -1586,6 +1628,7 @@ struct phylink *phylink_create(struct phylink_config *config,
pl->link_config.pause = MLO_PAUSE_AN;
pl->link_config.speed = SPEED_UNKNOWN;
pl->link_config.duplex = DUPLEX_UNKNOWN;
+ pl->pcs_state = PCS_STATE_DOWN;
pl->mac_ops = mac_ops;
__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
timer_setup(&pl->link_poll, phylink_fixed_poll, 0);
@@ -1939,6 +1982,14 @@ void phylink_disconnect_phy(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
+static void phylink_link_changed(struct phylink *pl, bool up, const char *what)
+{
+ if (!up)
+ pl->mac_link_dropped = true;
+ phylink_run_resolve(pl);
+ phylink_dbg(pl, "%s link %s\n", what, up ? "up" : "down");
+}
+
/**
* phylink_mac_change() - notify phylink of a change in MAC state
* @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -1949,13 +2000,30 @@ EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
*/
void phylink_mac_change(struct phylink *pl, bool up)
{
- if (!up)
- pl->mac_link_dropped = true;
- phylink_run_resolve(pl);
- phylink_dbg(pl, "mac link %s\n", up ? "up" : "down");
+ phylink_link_changed(pl, up, "mac");
}
EXPORT_SYMBOL_GPL(phylink_mac_change);
+/**
+ * phylink_pcs_change() - notify phylink of a change to PCS link state
+ * @pcs: pointer to &struct phylink_pcs
+ * @up: indicates whether the link is currently up.
+ *
+ * The PCS driver should call this when the state of its link changes
+ * (e.g. link failure, new negotiation results, etc.) Note: it should
+ * not determine "up" by reading the BMSR. If in doubt about the link
+ * state at interrupt time, then pass true if pcs_get_state() returns
+ * the latched link-down state, otherwise pass false.
+ */
+void phylink_pcs_change(struct phylink_pcs *pcs, bool up)
+{
+ struct phylink *pl = pcs->phylink;
+
+ if (pl)
+ phylink_link_changed(pl, up, "pcs");
+}
+EXPORT_SYMBOL_GPL(phylink_pcs_change);
+
static irqreturn_t phylink_link_handler(int irq, void *data)
{
struct phylink *pl = data;
@@ -1987,6 +2055,8 @@ void phylink_start(struct phylink *pl)
if (pl->netdev)
netif_carrier_off(pl->netdev);
+ pl->pcs_state = PCS_STATE_STARTING;
+
/* Apply the link configuration to the MAC when starting. This allows
* a fixed-link to start with the correct parameters, and also
* ensures that we set the appropriate advertisement for Serdes links.
@@ -1997,6 +2067,8 @@ void phylink_start(struct phylink *pl)
*/
phylink_mac_initial_config(pl, true);
+ pl->pcs_state = PCS_STATE_STARTED;
+
phylink_enable_and_run_resolve(pl, PHYLINK_DISABLE_STOPPED);
if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->link_gpio) {
@@ -2015,15 +2087,9 @@ void phylink_start(struct phylink *pl)
poll = true;
}
- switch (pl->cfg_link_an_mode) {
- case MLO_AN_FIXED:
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED)
poll |= pl->config->poll_fixed_state;
- break;
- case MLO_AN_INBAND:
- if (pl->pcs)
- poll |= pl->pcs->poll;
- break;
- }
+
if (poll)
mod_timer(&pl->link_poll, jiffies + HZ);
if (pl->phydev)
@@ -2060,6 +2126,10 @@ void phylink_stop(struct phylink *pl)
}
phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_STOPPED);
+
+ pl->pcs_state = PCS_STATE_DOWN;
+
+ phylink_pcs_disable(pl->pcs);
}
EXPORT_SYMBOL_GPL(phylink_stop);
@@ -2449,7 +2519,7 @@ int phylink_ethtool_nway_reset(struct phylink *pl)
if (pl->phydev)
ret = phy_restart_aneg(pl->phydev);
- phylink_mac_pcs_an_restart(pl);
+ phylink_pcs_an_restart(pl);
return ret;
}
@@ -3433,7 +3503,7 @@ static void phylink_decode_usgmii_word(struct phylink_link_state *state,
*
* Parse the Clause 37 or Cisco SGMII link partner negotiation word into
* the phylink @state structure. This is suitable to be used for implementing
- * the mac_pcs_get_state() member of the struct phylink_mac_ops structure if
+ * the pcs_get_state() member of the struct phylink_pcs_ops structure if
* accessing @bmsr and @lpa cannot be done with MDIO directly.
*/
void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
@@ -3483,7 +3553,7 @@ EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_decode_state);
* Read the MAC PCS state from the MII device configured in @config and
* parse the Clause 37 or Cisco SGMII link partner negotiation word into
* the phylink @state structure. This is suitable to be directly plugged
- * into the mac_pcs_get_state() member of the struct phylink_mac_ops
+ * into the pcs_get_state() member of the struct phylink_pcs_ops
* structure.
*/
void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
@@ -3594,8 +3664,8 @@ EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_config);
* clause 37 negotiation.
*
* Restart the clause 37 negotiation with the link partner. This is
- * suitable to be directly plugged into the mac_pcs_get_state() member
- * of the struct phylink_mac_ops structure.
+ * suitable to be directly plugged into the pcs_get_state() member
+ * of the struct phylink_pcs_ops structure.
*/
void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs)
{
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 692930750215..c88edb19d2e7 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -20,6 +20,8 @@
#include <linux/of.h>
#include <linux/phy.h>
#include <linux/netdevice.h>
+#include <linux/crc16.h>
+#include <linux/etherdevice.h>
#include <linux/smscphy.h>
/* Vendor-specific PHY Definitions */
@@ -51,6 +53,7 @@ struct smsc_phy_priv {
unsigned int edpd_enable:1;
unsigned int edpd_mode_set_by_user:1;
unsigned int edpd_max_wait_ms;
+ bool wol_arp;
};
static int smsc_phy_ack_interrupt(struct phy_device *phydev)
@@ -258,6 +261,243 @@ int lan87xx_read_status(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(lan87xx_read_status);
+static int lan874x_phy_config_init(struct phy_device *phydev)
+{
+ u16 val;
+ int rc;
+
+ /* Setup LED2/nINT/nPME pin to function as nPME. May need user option
+ * to use LED1/nINT/nPME.
+ */
+ val = MII_LAN874X_PHY_PME2_SET;
+
+ /* The bits MII_LAN874X_PHY_WOL_PFDA_FR, MII_LAN874X_PHY_WOL_WUFR,
+ * MII_LAN874X_PHY_WOL_MPR, and MII_LAN874X_PHY_WOL_BCAST_FR need to
+ * be cleared to de-assert PME signal after a WoL event happens, but
+ * using PME auto clear gets around that.
+ */
+ val |= MII_LAN874X_PHY_PME_SELF_CLEAR;
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS, MII_LAN874X_PHY_MMD_WOL_WUCSR,
+ val);
+ if (rc < 0)
+ return rc;
+
+ /* set nPME self clear delay time */
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS, MII_LAN874X_PHY_MMD_MCFGR,
+ MII_LAN874X_PHY_PME_SELF_CLEAR_DELAY);
+ if (rc < 0)
+ return rc;
+
+ return smsc_phy_config_init(phydev);
+}
+
+static void lan874x_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ struct smsc_phy_priv *priv = phydev->priv;
+ int rc;
+
+ wol->supported = (WAKE_UCAST | WAKE_BCAST | WAKE_MAGIC |
+ WAKE_ARP | WAKE_MCAST);
+ wol->wolopts = 0;
+
+ rc = phy_read_mmd(phydev, MDIO_MMD_PCS, MII_LAN874X_PHY_MMD_WOL_WUCSR);
+ if (rc < 0)
+ return;
+
+ if (rc & MII_LAN874X_PHY_WOL_PFDAEN)
+ wol->wolopts |= WAKE_UCAST;
+
+ if (rc & MII_LAN874X_PHY_WOL_BCSTEN)
+ wol->wolopts |= WAKE_BCAST;
+
+ if (rc & MII_LAN874X_PHY_WOL_MPEN)
+ wol->wolopts |= WAKE_MAGIC;
+
+ if (rc & MII_LAN874X_PHY_WOL_WUEN) {
+ if (priv->wol_arp)
+ wol->wolopts |= WAKE_ARP;
+ else
+ wol->wolopts |= WAKE_MCAST;
+ }
+}
+
+static u16 smsc_crc16(const u8 *buffer, size_t len)
+{
+ return bitrev16(crc16(0xFFFF, buffer, len));
+}
+
+static int lan874x_chk_wol_pattern(const u8 pattern[], const u16 *mask,
+ u8 len, u8 *data, u8 *datalen)
+{
+ size_t i, j, k;
+ int ret = 0;
+ u16 bits;
+
+ /* Pattern filtering can match up to 128 bytes of frame data. There
+ * are 8 registers to program the 16-bit masks, where each bit means
+ * the byte will be compared. The frame data will then go through a
+ * CRC16 calculation for hardware comparison. This helper function
+ * makes sure only relevant frame data are included in this
+ * calculation. It provides a warning when the masks and expected
+ * data size do not match.
+ */
+ i = 0;
+ k = 0;
+ while (len > 0) {
+ bits = *mask;
+ for (j = 0; j < 16; j++, i++, len--) {
+ /* No more pattern. */
+ if (!len) {
+ /* The rest of bitmap is not empty. */
+ if (bits)
+ ret = i + 1;
+ break;
+ }
+ if (bits & 1)
+ data[k++] = pattern[i];
+ bits >>= 1;
+ }
+ mask++;
+ }
+ *datalen = k;
+ return ret;
+}
+
+static int lan874x_set_wol_pattern(struct phy_device *phydev, u16 val,
+ const u8 data[], u8 datalen,
+ const u16 *mask, u8 masklen)
+{
+ u16 crc, reg;
+ int rc;
+
+ /* Starting pattern offset is set before calling this function. */
+ val |= MII_LAN874X_PHY_WOL_FILTER_EN;
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MII_LAN874X_PHY_MMD_WOL_WUF_CFGA, val);
+ if (rc < 0)
+ return rc;
+
+ crc = smsc_crc16(data, datalen);
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS,
+ MII_LAN874X_PHY_MMD_WOL_WUF_CFGB, crc);
+ if (rc < 0)
+ return rc;
+
+ masklen = (masklen + 15) & ~0xf;
+ reg = MII_LAN874X_PHY_MMD_WOL_WUF_MASK7;
+ while (masklen >= 16) {
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS, reg, *mask);
+ if (rc < 0)
+ return rc;
+ reg--;
+ mask++;
+ masklen -= 16;
+ }
+
+ /* Clear out the rest of mask registers. */
+ while (reg != MII_LAN874X_PHY_MMD_WOL_WUF_MASK0) {
+ phy_write_mmd(phydev, MDIO_MMD_PCS, reg, 0);
+ reg--;
+ }
+ return rc;
+}
+
+static int lan874x_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ struct net_device *ndev = phydev->attached_dev;
+ struct smsc_phy_priv *priv = phydev->priv;
+ u16 val, val_wucsr;
+ u8 data[128];
+ u8 datalen;
+ int rc;
+
+ /* lan874x has only one WoL filter pattern */
+ if ((wol->wolopts & (WAKE_ARP | WAKE_MCAST)) ==
+ (WAKE_ARP | WAKE_MCAST)) {
+ phydev_info(phydev,
+ "lan874x WoL supports one of ARP|MCAST at a time\n");
+ return -EOPNOTSUPP;
+ }
+
+ rc = phy_read_mmd(phydev, MDIO_MMD_PCS, MII_LAN874X_PHY_MMD_WOL_WUCSR);
+ if (rc < 0)
+ return rc;
+
+ val_wucsr = rc;
+
+ if (wol->wolopts & WAKE_UCAST)
+ val_wucsr |= MII_LAN874X_PHY_WOL_PFDAEN;
+ else
+ val_wucsr &= ~MII_LAN874X_PHY_WOL_PFDAEN;
+
+ if (wol->wolopts & WAKE_BCAST)
+ val_wucsr |= MII_LAN874X_PHY_WOL_BCSTEN;
+ else
+ val_wucsr &= ~MII_LAN874X_PHY_WOL_BCSTEN;
+
+ if (wol->wolopts & WAKE_MAGIC)
+ val_wucsr |= MII_LAN874X_PHY_WOL_MPEN;
+ else
+ val_wucsr &= ~MII_LAN874X_PHY_WOL_MPEN;
+
+ /* Need to use pattern matching */
+ if (wol->wolopts & (WAKE_ARP | WAKE_MCAST))
+ val_wucsr |= MII_LAN874X_PHY_WOL_WUEN;
+ else
+ val_wucsr &= ~MII_LAN874X_PHY_WOL_WUEN;
+
+ if (wol->wolopts & WAKE_ARP) {
+ const u8 pattern[2] = { 0x08, 0x06 };
+ const u16 mask[1] = { 0x0003 };
+
+ rc = lan874x_chk_wol_pattern(pattern, mask, 2, data,
+ &datalen);
+ if (rc)
+ phydev_dbg(phydev, "pattern not valid at %d\n", rc);
+
+ /* Need to match broadcast destination address and provided
+ * data pattern at offset 12.
+ */
+ val = 12 | MII_LAN874X_PHY_WOL_FILTER_BCSTEN;
+ rc = lan874x_set_wol_pattern(phydev, val, data, datalen, mask,
+ 2);
+ if (rc < 0)
+ return rc;
+ priv->wol_arp = true;
+ }
+
+ if (wol->wolopts & WAKE_MCAST) {
+ /* Need to match multicast destination address. */
+ val = MII_LAN874X_PHY_WOL_FILTER_MCASTTEN;
+ rc = lan874x_set_wol_pattern(phydev, val, data, 0, NULL, 0);
+ if (rc < 0)
+ return rc;
+ priv->wol_arp = false;
+ }
+
+ if (wol->wolopts & (WAKE_MAGIC | WAKE_UCAST)) {
+ const u8 *mac = (const u8 *)ndev->dev_addr;
+ int i, reg;
+
+ reg = MII_LAN874X_PHY_MMD_WOL_RX_ADDRC;
+ for (i = 0; i < 6; i += 2, reg--) {
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS, reg,
+ ((mac[i + 1] << 8) | mac[i]));
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ rc = phy_write_mmd(phydev, MDIO_MMD_PCS, MII_LAN874X_PHY_MMD_WOL_WUCSR,
+ val_wucsr);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
static int smsc_get_sset_count(struct phy_device *phydev)
{
return ARRAY_SIZE(smsc_hw_stats);
@@ -533,7 +773,7 @@ static struct phy_driver smsc_phy_driver[] = {
/* basic functions */
.read_status = lan87xx_read_status,
- .config_init = smsc_phy_config_init,
+ .config_init = lan874x_phy_config_init,
.soft_reset = smsc_phy_reset,
/* IRQ related */
@@ -548,6 +788,10 @@ static struct phy_driver smsc_phy_driver[] = {
.get_tunable = smsc_phy_get_tunable,
.set_tunable = smsc_phy_set_tunable,
+ /* WoL */
+ .set_wol = lan874x_set_wol,
+ .get_wol = lan874x_get_wol,
+
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -566,7 +810,7 @@ static struct phy_driver smsc_phy_driver[] = {
/* basic functions */
.read_status = lan87xx_read_status,
- .config_init = smsc_phy_config_init,
+ .config_init = lan874x_phy_config_init,
.soft_reset = smsc_phy_reset,
/* IRQ related */
@@ -581,6 +825,10 @@ static struct phy_driver smsc_phy_driver[] = {
.get_tunable = smsc_phy_get_tunable,
.set_tunable = smsc_phy_set_tunable,
+ /* WoL */
+ .set_wol = lan874x_set_wol,
+ .get_wol = lan874x_get_wol,
+
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 3b79c603b936..ba8b6bd8233c 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -968,7 +968,7 @@ abort:
***********************************************************************/
static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *)chan->private;
+ struct sock *sk = chan->private;
return __pppoe_xmit(sk, skb);
}
@@ -976,7 +976,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
struct net_device_path *path,
const struct ppp_channel *chan)
{
- struct sock *sk = (struct sock *)chan->private;
+ struct sock *sk = chan->private;
struct pppox_sock *po = pppox_sk(sk);
struct net_device *dev = po->pppoe_dev;
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 32183f24e63f..6833ef0c7930 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -129,10 +129,10 @@ static void del_chan(struct pppox_sock *sock)
spin_unlock(&chan_lock);
}
-static struct rtable *pptp_route_output(struct pppox_sock *po,
+static struct rtable *pptp_route_output(const struct pppox_sock *po,
struct flowi4 *fl4)
{
- struct sock *sk = &po->sk;
+ const struct sock *sk = &po->sk;
struct net *net;
net = sock_net(sk);
@@ -148,7 +148,7 @@ static struct rtable *pptp_route_output(struct pppox_sock *po,
static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *) chan->private;
+ struct sock *sk = chan->private;
struct pppox_sock *po = pppox_sk(sk);
struct net *net = sock_net(sk);
struct pptp_opt *opt = &po->proto.pptp;
@@ -575,7 +575,7 @@ out:
static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
unsigned long arg)
{
- struct sock *sk = (struct sock *) chan->private;
+ struct sock *sk = chan->private;
struct pppox_sock *po = pppox_sk(sk);
struct pptp_opt *opt = &po->proto.pptp;
void __user *argp = (void __user *)arg;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index c9a9373733c0..2bddcdf482a7 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2720,6 +2720,45 @@ drop:
dev_kfree_skb(skb);
}
+static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev,
+ u32 nhid, __be32 vni)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst nh_rdst;
+ struct nexthop *nh;
+ bool do_xmit;
+ u32 hash;
+
+ memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+ hash = skb_get_hash(skb);
+
+ rcu_read_lock();
+ nh = nexthop_find_by_id(dev_net(dev), nhid);
+ if (unlikely(!nh || !nexthop_is_fdb(nh) || !nexthop_is_multipath(nh))) {
+ rcu_read_unlock();
+ goto drop;
+ }
+ do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+ rcu_read_unlock();
+
+ if (vxlan->cfg.saddr.sa.sa_family != nh_rdst.remote_ip.sa.sa_family)
+ goto drop;
+
+ if (likely(do_xmit))
+ vxlan_xmit_one(skb, dev, vni, &nh_rdst, false);
+ else
+ goto drop;
+
+ return NETDEV_TX_OK;
+
+drop:
+ dev->stats.tx_dropped++;
+ vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
+ VXLAN_VNI_STATS_TX_DROPS, 0);
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
/* Transmit local packets over Vxlan
*
* Outer IP header inherits ECN and DF from inner header.
@@ -2735,6 +2774,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
struct vxlan_fdb *f;
struct ethhdr *eth;
__be32 vni = 0;
+ u32 nhid = 0;
info = skb_tunnel_info(skb);
@@ -2744,6 +2784,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
info->mode & IP_TUNNEL_INFO_TX) {
vni = tunnel_id_to_key32(info->key.tun_id);
+ nhid = info->key.nhid;
} else {
if (info && info->mode & IP_TUNNEL_INFO_TX)
vxlan_xmit_one(skb, dev, vni, NULL, false);
@@ -2771,6 +2812,9 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
+ if (nhid)
+ return vxlan_xmit_nhid(skb, dev, nhid, vni);
+
if (vxlan->cfg.flags & VXLAN_F_MDB) {
struct vxlan_mdb_entry *mdb_entry;
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
index 7162bf38a8c9..cc70360364b7 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
@@ -1066,13 +1066,18 @@ static void t7xx_hw_info_init(struct cldma_ctrl *md_ctrl)
struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
u32 phy_ao_base, phy_pd_base;
- if (md_ctrl->hif_id != CLDMA_ID_MD)
- return;
-
- phy_ao_base = CLDMA1_AO_BASE;
- phy_pd_base = CLDMA1_PD_BASE;
- hw_info->phy_interrupt_id = CLDMA1_INT;
hw_info->hw_mode = MODE_BIT_64;
+
+ if (md_ctrl->hif_id == CLDMA_ID_MD) {
+ phy_ao_base = CLDMA1_AO_BASE;
+ phy_pd_base = CLDMA1_PD_BASE;
+ hw_info->phy_interrupt_id = CLDMA1_INT;
+ } else {
+ phy_ao_base = CLDMA0_AO_BASE;
+ phy_pd_base = CLDMA0_PD_BASE;
+ hw_info->phy_interrupt_id = CLDMA0_INT;
+ }
+
hw_info->ap_ao_base = t7xx_pcie_addr_transfer(pbase->pcie_ext_reg_base,
pbase->pcie_dev_reg_trsl_addr, phy_ao_base);
hw_info->ap_pdn_base = t7xx_pcie_addr_transfer(pbase->pcie_ext_reg_base,
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.h b/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
index 47a35e552da7..4410bac6993a 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.h
@@ -34,7 +34,7 @@
/**
* enum cldma_id - Identifiers for CLDMA HW units.
* @CLDMA_ID_MD: Modem control channel.
- * @CLDMA_ID_AP: Application Processor control channel (not used at the moment).
+ * @CLDMA_ID_AP: Application Processor control channel.
* @CLDMA_NUM: Number of CLDMA HW units available.
*/
enum cldma_id {
diff --git a/drivers/net/wwan/t7xx/t7xx_mhccif.h b/drivers/net/wwan/t7xx/t7xx_mhccif.h
index 209b386bc088..20c50dce9fc3 100644
--- a/drivers/net/wwan/t7xx/t7xx_mhccif.h
+++ b/drivers/net/wwan/t7xx/t7xx_mhccif.h
@@ -25,6 +25,7 @@
D2H_INT_EXCEPTION_CLEARQ_DONE | \
D2H_INT_EXCEPTION_ALLQ_RESET | \
D2H_INT_PORT_ENUM | \
+ D2H_INT_ASYNC_AP_HK | \
D2H_INT_ASYNC_MD_HK)
void t7xx_mhccif_mask_set(struct t7xx_pci_dev *t7xx_dev, u32 val);
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
index 7d0f5e4f0a78..24e7d491468e 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
@@ -44,6 +44,7 @@
#include "t7xx_state_monitor.h"
#define RT_ID_MD_PORT_ENUM 0
+#define RT_ID_AP_PORT_ENUM 1
/* Modem feature query identification code - "ICCC" */
#define MD_FEATURE_QUERY_ID 0x49434343
@@ -298,6 +299,7 @@ static void t7xx_md_exception(struct t7xx_modem *md, enum hif_ex_stage stage)
}
t7xx_cldma_exception(md->md_ctrl[CLDMA_ID_MD], stage);
+ t7xx_cldma_exception(md->md_ctrl[CLDMA_ID_AP], stage);
if (stage == HIF_EX_INIT)
t7xx_mhccif_h2d_swint_trigger(t7xx_dev, H2D_CH_EXCEPTION_ACK);
@@ -426,7 +428,7 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf
if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED)
return -EINVAL;
- if (i == RT_ID_MD_PORT_ENUM)
+ if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM)
t7xx_port_enum_msg_handler(ctl->md, rt_feature->data);
}
@@ -456,12 +458,12 @@ static int t7xx_core_reset(struct t7xx_modem *md)
return 0;
}
-static void t7xx_core_hk_handler(struct t7xx_modem *md, struct t7xx_fsm_ctl *ctl,
+static void t7xx_core_hk_handler(struct t7xx_modem *md, struct t7xx_sys_info *core_info,
+ struct t7xx_fsm_ctl *ctl,
enum t7xx_fsm_event_state event_id,
enum t7xx_fsm_event_state err_detect)
{
struct t7xx_fsm_event *event = NULL, *event_next;
- struct t7xx_sys_info *core_info = &md->core_md;
struct device *dev = &md->t7xx_dev->pdev->dev;
unsigned long flags;
int ret;
@@ -531,19 +533,33 @@ static void t7xx_md_hk_wq(struct work_struct *work)
t7xx_cldma_start(md->md_ctrl[CLDMA_ID_MD]);
t7xx_fsm_broadcast_state(ctl, MD_STATE_WAITING_FOR_HS2);
md->core_md.handshake_ongoing = true;
- t7xx_core_hk_handler(md, ctl, FSM_EVENT_MD_HS2, FSM_EVENT_MD_HS2_EXIT);
+ t7xx_core_hk_handler(md, &md->core_md, ctl, FSM_EVENT_MD_HS2, FSM_EVENT_MD_HS2_EXIT);
+}
+
+static void t7xx_ap_hk_wq(struct work_struct *work)
+{
+ struct t7xx_modem *md = container_of(work, struct t7xx_modem, ap_handshake_work);
+ struct t7xx_fsm_ctl *ctl = md->fsm_ctl;
+
+ /* Clear the HS2 EXIT event appended in t7xx_core_reset(). */
+ t7xx_fsm_clr_event(ctl, FSM_EVENT_AP_HS2_EXIT);
+ t7xx_cldma_stop(md->md_ctrl[CLDMA_ID_AP]);
+ t7xx_cldma_switch_cfg(md->md_ctrl[CLDMA_ID_AP]);
+ t7xx_cldma_start(md->md_ctrl[CLDMA_ID_AP]);
+ md->core_ap.handshake_ongoing = true;
+ t7xx_core_hk_handler(md, &md->core_ap, ctl, FSM_EVENT_AP_HS2, FSM_EVENT_AP_HS2_EXIT);
}
void t7xx_md_event_notify(struct t7xx_modem *md, enum md_event_id evt_id)
{
struct t7xx_fsm_ctl *ctl = md->fsm_ctl;
- void __iomem *mhccif_base;
unsigned int int_sta;
unsigned long flags;
switch (evt_id) {
case FSM_PRE_START:
- t7xx_mhccif_mask_clr(md->t7xx_dev, D2H_INT_PORT_ENUM);
+ t7xx_mhccif_mask_clr(md->t7xx_dev, D2H_INT_PORT_ENUM | D2H_INT_ASYNC_MD_HK |
+ D2H_INT_ASYNC_AP_HK);
break;
case FSM_START:
@@ -556,16 +572,26 @@ void t7xx_md_event_notify(struct t7xx_modem *md, enum md_event_id evt_id)
ctl->exp_flg = true;
md->exp_id &= ~D2H_INT_EXCEPTION_INIT;
md->exp_id &= ~D2H_INT_ASYNC_MD_HK;
+ md->exp_id &= ~D2H_INT_ASYNC_AP_HK;
} else if (ctl->exp_flg) {
md->exp_id &= ~D2H_INT_ASYNC_MD_HK;
- } else if (md->exp_id & D2H_INT_ASYNC_MD_HK) {
- queue_work(md->handshake_wq, &md->handshake_work);
- md->exp_id &= ~D2H_INT_ASYNC_MD_HK;
- mhccif_base = md->t7xx_dev->base_addr.mhccif_rc_base;
- iowrite32(D2H_INT_ASYNC_MD_HK, mhccif_base + REG_EP2RC_SW_INT_ACK);
- t7xx_mhccif_mask_set(md->t7xx_dev, D2H_INT_ASYNC_MD_HK);
+ md->exp_id &= ~D2H_INT_ASYNC_AP_HK;
} else {
- t7xx_mhccif_mask_clr(md->t7xx_dev, D2H_INT_ASYNC_MD_HK);
+ void __iomem *mhccif_base = md->t7xx_dev->base_addr.mhccif_rc_base;
+
+ if (md->exp_id & D2H_INT_ASYNC_MD_HK) {
+ queue_work(md->handshake_wq, &md->handshake_work);
+ md->exp_id &= ~D2H_INT_ASYNC_MD_HK;
+ iowrite32(D2H_INT_ASYNC_MD_HK, mhccif_base + REG_EP2RC_SW_INT_ACK);
+ t7xx_mhccif_mask_set(md->t7xx_dev, D2H_INT_ASYNC_MD_HK);
+ }
+
+ if (md->exp_id & D2H_INT_ASYNC_AP_HK) {
+ queue_work(md->handshake_wq, &md->ap_handshake_work);
+ md->exp_id &= ~D2H_INT_ASYNC_AP_HK;
+ iowrite32(D2H_INT_ASYNC_AP_HK, mhccif_base + REG_EP2RC_SW_INT_ACK);
+ t7xx_mhccif_mask_set(md->t7xx_dev, D2H_INT_ASYNC_AP_HK);
+ }
}
spin_unlock_irqrestore(&md->exp_lock, flags);
@@ -578,6 +604,7 @@ void t7xx_md_event_notify(struct t7xx_modem *md, enum md_event_id evt_id)
case FSM_READY:
t7xx_mhccif_mask_set(md->t7xx_dev, D2H_INT_ASYNC_MD_HK);
+ t7xx_mhccif_mask_set(md->t7xx_dev, D2H_INT_ASYNC_AP_HK);
break;
default:
@@ -629,6 +656,12 @@ static struct t7xx_modem *t7xx_md_alloc(struct t7xx_pci_dev *t7xx_dev)
md->core_md.feature_set[RT_ID_MD_PORT_ENUM] &= ~FEATURE_MSK;
md->core_md.feature_set[RT_ID_MD_PORT_ENUM] |=
FIELD_PREP(FEATURE_MSK, MTK_FEATURE_MUST_BE_SUPPORTED);
+
+ INIT_WORK(&md->ap_handshake_work, t7xx_ap_hk_wq);
+ md->core_ap.feature_set[RT_ID_AP_PORT_ENUM] &= ~FEATURE_MSK;
+ md->core_ap.feature_set[RT_ID_AP_PORT_ENUM] |=
+ FIELD_PREP(FEATURE_MSK, MTK_FEATURE_MUST_BE_SUPPORTED);
+
return md;
}
@@ -640,6 +673,7 @@ int t7xx_md_reset(struct t7xx_pci_dev *t7xx_dev)
md->exp_id = 0;
t7xx_fsm_reset(md);
t7xx_cldma_reset(md->md_ctrl[CLDMA_ID_MD]);
+ t7xx_cldma_reset(md->md_ctrl[CLDMA_ID_AP]);
t7xx_port_proxy_reset(md->port_prox);
md->md_init_finish = true;
return t7xx_core_reset(md);
@@ -669,6 +703,10 @@ int t7xx_md_init(struct t7xx_pci_dev *t7xx_dev)
if (ret)
goto err_destroy_hswq;
+ ret = t7xx_cldma_alloc(CLDMA_ID_AP, t7xx_dev);
+ if (ret)
+ goto err_destroy_hswq;
+
ret = t7xx_fsm_init(md);
if (ret)
goto err_destroy_hswq;
@@ -681,12 +719,16 @@ int t7xx_md_init(struct t7xx_pci_dev *t7xx_dev)
if (ret)
goto err_uninit_ccmni;
- ret = t7xx_port_proxy_init(md);
+ ret = t7xx_cldma_init(md->md_ctrl[CLDMA_ID_AP]);
if (ret)
goto err_uninit_md_cldma;
+ ret = t7xx_port_proxy_init(md);
+ if (ret)
+ goto err_uninit_ap_cldma;
+
ret = t7xx_fsm_append_cmd(md->fsm_ctl, FSM_CMD_START, 0);
- if (ret) /* fsm_uninit flushes cmd queue */
+ if (ret) /* t7xx_fsm_uninit() flushes cmd queue */
goto err_uninit_proxy;
t7xx_md_sys_sw_init(t7xx_dev);
@@ -696,6 +738,9 @@ int t7xx_md_init(struct t7xx_pci_dev *t7xx_dev)
err_uninit_proxy:
t7xx_port_proxy_uninit(md->port_prox);
+err_uninit_ap_cldma:
+ t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_AP]);
+
err_uninit_md_cldma:
t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_MD]);
@@ -722,6 +767,7 @@ void t7xx_md_exit(struct t7xx_pci_dev *t7xx_dev)
t7xx_fsm_append_cmd(md->fsm_ctl, FSM_CMD_PRE_STOP, FSM_CMD_FLAG_WAIT_FOR_COMPLETION);
t7xx_port_proxy_uninit(md->port_prox);
+ t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_AP]);
t7xx_cldma_exit(md->md_ctrl[CLDMA_ID_MD]);
t7xx_ccmni_exit(t7xx_dev);
t7xx_fsm_uninit(md);
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.h b/drivers/net/wwan/t7xx/t7xx_modem_ops.h
index 7469ed636ae8..abe633cf7adc 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.h
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.h
@@ -66,10 +66,12 @@ struct t7xx_modem {
struct cldma_ctrl *md_ctrl[CLDMA_NUM];
struct t7xx_pci_dev *t7xx_dev;
struct t7xx_sys_info core_md;
+ struct t7xx_sys_info core_ap;
bool md_init_finish;
bool rgu_irq_asserted;
struct workqueue_struct *handshake_wq;
struct work_struct handshake_work;
+ struct work_struct ap_handshake_work;
struct t7xx_fsm_ctl *fsm_ctl;
struct port_proxy *port_prox;
unsigned int exp_id;
diff --git a/drivers/net/wwan/t7xx/t7xx_port.h b/drivers/net/wwan/t7xx/t7xx_port.h
index 8ea9079af997..4ae8a00a8532 100644
--- a/drivers/net/wwan/t7xx/t7xx_port.h
+++ b/drivers/net/wwan/t7xx/t7xx_port.h
@@ -36,9 +36,13 @@
/* Channel ID and Message ID definitions.
* The channel number consists of peer_id(15:12) , channel_id(11:0)
* peer_id:
- * 0:reserved, 1: to sAP, 2: to MD
+ * 0:reserved, 1: to AP, 2: to MD
*/
enum port_ch {
+ /* to AP */
+ PORT_CH_AP_CONTROL_RX = 0x1000,
+ PORT_CH_AP_CONTROL_TX = 0x1001,
+
/* to MD */
PORT_CH_CONTROL_RX = 0x2000,
PORT_CH_CONTROL_TX = 0x2001,
diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
index 68430b130a67..ae632ef96698 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
@@ -167,8 +167,12 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb)
case CTL_ID_HS2_MSG:
skb_pull(skb, sizeof(*ctrl_msg_h));
- if (port_conf->rx_ch == PORT_CH_CONTROL_RX) {
- ret = t7xx_fsm_append_event(ctl, FSM_EVENT_MD_HS2, skb->data,
+ if (port_conf->rx_ch == PORT_CH_CONTROL_RX ||
+ port_conf->rx_ch == PORT_CH_AP_CONTROL_RX) {
+ int event = port_conf->rx_ch == PORT_CH_CONTROL_RX ?
+ FSM_EVENT_MD_HS2 : FSM_EVENT_AP_HS2;
+
+ ret = t7xx_fsm_append_event(ctl, event, skb->data,
le32_to_cpu(ctrl_msg_h->data_length));
if (ret)
dev_err(port->dev, "Failed to append Handshake 2 event");
diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.c b/drivers/net/wwan/t7xx/t7xx_port_proxy.c
index 894b1d11b2c9..274846d39fbf 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_proxy.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.c
@@ -48,7 +48,7 @@
i < (proxy)->port_count; \
i++, (p) = &(proxy)->ports[i])
-static const struct t7xx_port_conf t7xx_md_port_conf[] = {
+static const struct t7xx_port_conf t7xx_port_conf[] = {
{
.tx_ch = PORT_CH_UART2_TX,
.rx_ch = PORT_CH_UART2_RX,
@@ -89,6 +89,14 @@ static const struct t7xx_port_conf t7xx_md_port_conf[] = {
.path_id = CLDMA_ID_MD,
.ops = &ctl_port_ops,
.name = "t7xx_ctrl",
+ }, {
+ .tx_ch = PORT_CH_AP_CONTROL_TX,
+ .rx_ch = PORT_CH_AP_CONTROL_RX,
+ .txq_index = Q_IDX_CTRL,
+ .rxq_index = Q_IDX_CTRL,
+ .path_id = CLDMA_ID_AP,
+ .ops = &ctl_port_ops,
+ .name = "t7xx_ap_ctrl",
},
};
@@ -428,6 +436,9 @@ static void t7xx_proxy_init_all_ports(struct t7xx_modem *md)
if (port_conf->tx_ch == PORT_CH_CONTROL_TX)
md->core_md.ctl_port = port;
+ if (port_conf->tx_ch == PORT_CH_AP_CONTROL_TX)
+ md->core_ap.ctl_port = port;
+
port->t7xx_dev = md->t7xx_dev;
port->dev = &md->t7xx_dev->pdev->dev;
spin_lock_init(&port->port_update_lock);
@@ -442,7 +453,7 @@ static void t7xx_proxy_init_all_ports(struct t7xx_modem *md)
static int t7xx_proxy_alloc(struct t7xx_modem *md)
{
- unsigned int port_count = ARRAY_SIZE(t7xx_md_port_conf);
+ unsigned int port_count = ARRAY_SIZE(t7xx_port_conf);
struct device *dev = &md->t7xx_dev->pdev->dev;
struct port_proxy *port_prox;
int i;
@@ -456,7 +467,7 @@ static int t7xx_proxy_alloc(struct t7xx_modem *md)
port_prox->dev = dev;
for (i = 0; i < port_count; i++)
- port_prox->ports[i].port_conf = &t7xx_md_port_conf[i];
+ port_prox->ports[i].port_conf = &t7xx_port_conf[i];
port_prox->port_count = port_count;
t7xx_proxy_init_all_ports(md);
@@ -481,6 +492,7 @@ int t7xx_port_proxy_init(struct t7xx_modem *md)
if (ret)
return ret;
+ t7xx_cldma_set_recv_skb(md->md_ctrl[CLDMA_ID_AP], t7xx_port_proxy_recv_skb);
t7xx_cldma_set_recv_skb(md->md_ctrl[CLDMA_ID_MD], t7xx_port_proxy_recv_skb);
return 0;
}
diff --git a/drivers/net/wwan/t7xx/t7xx_reg.h b/drivers/net/wwan/t7xx/t7xx_reg.h
index 7c1b81091a0f..c41d7d094c08 100644
--- a/drivers/net/wwan/t7xx/t7xx_reg.h
+++ b/drivers/net/wwan/t7xx/t7xx_reg.h
@@ -56,7 +56,7 @@
#define D2H_INT_RESUME_ACK BIT(12)
#define D2H_INT_SUSPEND_ACK_AP BIT(13)
#define D2H_INT_RESUME_ACK_AP BIT(14)
-#define D2H_INT_ASYNC_SAP_HK BIT(15)
+#define D2H_INT_ASYNC_AP_HK BIT(15)
#define D2H_INT_ASYNC_MD_HK BIT(16)
/* Register base */
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
index 0bcca08ff2bd..80edb8e75a6a 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
@@ -285,8 +285,9 @@ static int fsm_routine_starting(struct t7xx_fsm_ctl *ctl)
t7xx_fsm_broadcast_state(ctl, MD_STATE_WAITING_FOR_HS1);
t7xx_md_event_notify(md, FSM_START);
- wait_event_interruptible_timeout(ctl->async_hk_wq, md->core_md.ready || ctl->exp_flg,
- HZ * 60);
+ wait_event_interruptible_timeout(ctl->async_hk_wq,
+ (md->core_md.ready && md->core_ap.ready) ||
+ ctl->exp_flg, HZ * 60);
dev = &md->t7xx_dev->pdev->dev;
if (ctl->exp_flg)
@@ -299,6 +300,13 @@ static int fsm_routine_starting(struct t7xx_fsm_ctl *ctl)
fsm_routine_exception(ctl, NULL, EXCEPTION_HS_TIMEOUT);
return -ETIMEDOUT;
+ } else if (!md->core_ap.ready) {
+ dev_err(dev, "AP handshake timeout\n");
+ if (md->core_ap.handshake_ongoing)
+ t7xx_fsm_append_event(ctl, FSM_EVENT_AP_HS2_EXIT, NULL, 0);
+
+ fsm_routine_exception(ctl, NULL, EXCEPTION_HS_TIMEOUT);
+ return -ETIMEDOUT;
}
t7xx_pci_pm_init_late(md->t7xx_dev);
@@ -335,6 +343,7 @@ static void fsm_routine_start(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command
return;
}
+ t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_AP]);
t7xx_cldma_hif_hw_init(md->md_ctrl[CLDMA_ID_MD]);
fsm_finish_command(ctl, cmd, fsm_routine_starting(ctl));
}
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
index b1af0259d4c5..b6e76f3903c8 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
@@ -38,10 +38,12 @@ enum t7xx_fsm_state {
enum t7xx_fsm_event_state {
FSM_EVENT_INVALID,
FSM_EVENT_MD_HS2,
+ FSM_EVENT_AP_HS2,
FSM_EVENT_MD_EX,
FSM_EVENT_MD_EX_REC_OK,
FSM_EVENT_MD_EX_PASS,
FSM_EVENT_MD_HS2_EXIT,
+ FSM_EVENT_AP_HS2_EXIT,
FSM_EVENT_MAX
};
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 350154e4c2b5..a52859d024f0 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/timex.h>
#include <linux/slab.h>
#include <linux/clk.h>
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index 9c67b97faba2..74760c1a163b 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -5,12 +5,11 @@ menu "S/390 network device drivers"
config LCS
def_tristate m
prompt "Lan Channel Station Interface"
- depends on CCW && NETDEVICES && (ETHERNET || FDDI)
+ depends on CCW && NETDEVICES && ETHERNET
help
Select this option if you want to use LCS networking on IBM System z.
- This device driver supports FDDI (IEEE 802.7) and Ethernet.
To compile as a module, choose M. The module name is lcs.
- If you do not know what it is, it's safe to choose Y.
+ If you do not use LCS, choose N.
config CTCM
def_tristate m
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 9fd8e6f07a03..a1f2acd6fb8f 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -17,7 +17,6 @@
#include <linux/if.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
-#include <linux/fddidevice.h>
#include <linux/inetdevice.h>
#include <linux/in.h>
#include <linux/igmp.h>
@@ -36,10 +35,6 @@
#include "lcs.h"
-#if !defined(CONFIG_ETHERNET) && !defined(CONFIG_FDDI)
-#error Cannot compile lcs.c without some net devices switched on.
-#endif
-
/*
* initialization string for output
*/
@@ -1601,19 +1596,11 @@ lcs_startlan_auto(struct lcs_card *card)
int rc;
LCS_DBF_TEXT(2, trace, "strtauto");
-#ifdef CONFIG_ETHERNET
card->lan_type = LCS_FRAME_TYPE_ENET;
rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
if (rc == 0)
return 0;
-#endif
-#ifdef CONFIG_FDDI
- card->lan_type = LCS_FRAME_TYPE_FDDI;
- rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
- if (rc == 0)
- return 0;
-#endif
return -EIO;
}
@@ -1806,22 +1793,16 @@ lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
card->stats.rx_errors++;
return;
}
- /* What kind of frame is it? */
- if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL) {
- /* Control frame. */
+ if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL)
lcs_get_control(card, (struct lcs_cmd *) lcs_hdr);
- } else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET ||
- lcs_hdr->type == LCS_FRAME_TYPE_TR ||
- lcs_hdr->type == LCS_FRAME_TYPE_FDDI) {
- /* Normal network packet. */
+ else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET)
lcs_get_skb(card, (char *)(lcs_hdr + 1),
lcs_hdr->offset - offset -
sizeof(struct lcs_header));
- } else {
- /* Unknown frame type. */
- ; // FIXME: error message ?
- }
- /* Proceed to next frame. */
+ else
+ dev_info_once(&card->dev->dev,
+ "Unknown frame type %d\n",
+ lcs_hdr->type);
offset = lcs_hdr->offset;
lcs_hdr->offset = LCS_ILLEGAL_OFFSET;
lcs_hdr = (struct lcs_header *) (buffer->data + offset);
@@ -2140,18 +2121,10 @@ lcs_new_device(struct ccwgroup_device *ccwgdev)
goto netdev_out;
}
switch (card->lan_type) {
-#ifdef CONFIG_ETHERNET
case LCS_FRAME_TYPE_ENET:
card->lan_type_trans = eth_type_trans;
dev = alloc_etherdev(0);
break;
-#endif
-#ifdef CONFIG_FDDI
- case LCS_FRAME_TYPE_FDDI:
- card->lan_type_trans = fddi_type_trans;
- dev = alloc_fddidev(0);
- break;
-#endif
default:
LCS_DBF_TEXT(3, setup, "errinit");
pr_err(" Initialization failed\n");
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index db110cc442b1..691978cddab7 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -65,7 +65,8 @@ static void w1_unref_block(struct w1_cb_block *block)
u16 len = w1_reply_len(block);
if (len) {
cn_netlink_send_mult(block->first_cn, len,
- block->portid, 0, GFP_KERNEL);
+ block->portid, 0,
+ GFP_KERNEL, NULL, NULL);
}
kfree(block);
}
@@ -83,7 +84,8 @@ static void w1_reply_make_space(struct w1_cb_block *block, u16 space)
{
u16 len = w1_reply_len(block);
if (len + space >= block->maxlen) {
- cn_netlink_send_mult(block->first_cn, len, block->portid, 0, GFP_KERNEL);
+ cn_netlink_send_mult(block->first_cn, len, block->portid,
+ 0, GFP_KERNEL, NULL, NULL);
block->first_cn->len = 0;
block->cn = NULL;
block->msg = NULL;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 57e9e109257e..8506690dbb9c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -199,9 +199,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
- if (sk_fullsock(__sk) && \
+ if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \
cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
CGROUP_INET_EGRESS); \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..ceaa8c23287f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -228,6 +228,18 @@ struct btf_record {
struct btf_field fields[];
};
+/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
+struct bpf_rb_node_kern {
+ struct rb_node rb_node;
+ void *owner;
+} __attribute__((aligned(8)));
+
+/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
+struct bpf_list_node_kern {
+ struct list_head list_head;
+ void *owner;
+} __attribute__((aligned(8)));
+
struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
@@ -275,6 +287,7 @@ struct bpf_map {
} owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
+ s64 __percpu *elem_count;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -2040,6 +2053,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
}
#endif
+static inline int
+bpf_map_init_elem_count(struct bpf_map *map)
+{
+ size_t size = sizeof(*map->elem_count), align = size;
+ gfp_t flags = GFP_USER | __GFP_NOWARN;
+
+ map->elem_count = bpf_map_alloc_percpu(map, size, align, flags);
+ if (!map->elem_count)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void
+bpf_map_free_elem_count(struct bpf_map *map)
+{
+ free_percpu(map->elem_count);
+}
+
+static inline void bpf_map_inc_elem_count(struct bpf_map *map)
+{
+ this_cpu_inc(*map->elem_count);
+}
+
+static inline void bpf_map_dec_elem_count(struct bpf_map *map)
+{
+ this_cpu_dec(*map->elem_count);
+}
+
extern int sysctl_unprivileged_bpf_disabled;
static inline bool bpf_allow_ptr_leaks(void)
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index 3929be5743f4..d644bbb298af 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -27,10 +27,12 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
/* kmalloc/kfree equivalent: */
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
void bpf_mem_cache_raw_free(void *ptr);
void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h
new file mode 100644
index 000000000000..2b429488f840
--- /dev/null
+++ b/include/linux/bpf_mprog.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __BPF_MPROG_H
+#define __BPF_MPROG_H
+
+#include <linux/bpf.h>
+
+/* bpf_mprog framework:
+ *
+ * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
+ * of the bpf_mprog don't need to care about the dependency resolution
+ * internals, they can just consume it with few API calls. Currently available
+ * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
+ * a BPF program or BPF link relative to an existing BPF program or BPF link
+ * inside the multi-program array as well as prepend and append behavior if
+ * no relative object was specified, see corresponding selftests for concrete
+ * examples (e.g. tc_links and tc_opts test cases of test_progs).
+ *
+ * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
+ *
+ * Attach case:
+ *
+ * struct bpf_mprog_entry *entry, *entry_new;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_attach(entry, &entry_new, [...]);
+ * if (!ret) {
+ * if (entry != entry_new) {
+ * // swap @entry to @entry_new at attach location
+ * // ensure there are no inflight users of @entry:
+ * synchronize_rcu();
+ * }
+ * bpf_mprog_commit(entry);
+ * } else {
+ * // error path, bail out, propagate @ret
+ * }
+ * // bpf_mprog user-side unlock
+ *
+ * Detach case:
+ *
+ * struct bpf_mprog_entry *entry, *entry_new;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_detach(entry, &entry_new, [...]);
+ * if (!ret) {
+ * // all (*) marked is optional and depends on the use-case
+ * // whether bpf_mprog_bundle should be freed or not
+ * if (!bpf_mprog_total(entry_new)) (*)
+ * entry_new = NULL (*)
+ * // swap @entry to @entry_new at attach location
+ * // ensure there are no inflight users of @entry:
+ * synchronize_rcu();
+ * bpf_mprog_commit(entry);
+ * if (!entry_new) (*)
+ * // free bpf_mprog_bundle (*)
+ * } else {
+ * // error path, bail out, propagate @ret
+ * }
+ * // bpf_mprog user-side unlock
+ *
+ * Query case:
+ *
+ * struct bpf_mprog_entry *entry;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_query(attr, uattr, entry);
+ * // bpf_mprog user-side unlock
+ *
+ * Data/fast path:
+ *
+ * struct bpf_mprog_entry *entry;
+ * struct bpf_mprog_fp *fp;
+ * struct bpf_prog *prog;
+ * int ret = [...];
+ *
+ * rcu_read_lock();
+ * // fetch active @entry from attach location
+ * [...]
+ * bpf_mprog_foreach_prog(entry, fp, prog) {
+ * ret = bpf_prog_run(prog, [...]);
+ * // process @ret from program
+ * }
+ * [...]
+ * rcu_read_unlock();
+ *
+ * bpf_mprog locking considerations:
+ *
+ * bpf_mprog_{attach,detach,query}() must be protected by an external lock
+ * (like RTNL in case of tcx).
+ *
+ * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
+ * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
+ * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
+ * the bpf_mprog_bundle.
+ *
+ * Fast path accesses the active bpf_mprog_entry within RCU critical section
+ * (in case of tcx it runs in NAPI which provides RCU protection there,
+ * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
+ * assumes that for the old bpf_mprog_entry there are no inflight users
+ * anymore.
+ *
+ * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
+ * the replacement case where we don't swap the bpf_mprog_entry.
+ */
+
+#define bpf_mprog_foreach_tuple(entry, fp, cp, t) \
+ for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
+ ({ \
+ t.prog = READ_ONCE(fp->prog); \
+ t.link = cp->link; \
+ t.prog; \
+ }); \
+ fp++, cp++)
+
+#define bpf_mprog_foreach_prog(entry, fp, p) \
+ for (fp = &entry->fp_items[0]; \
+ (p = READ_ONCE(fp->prog)); \
+ fp++)
+
+#define BPF_MPROG_MAX 64
+
+struct bpf_mprog_fp {
+ struct bpf_prog *prog;
+};
+
+struct bpf_mprog_cp {
+ struct bpf_link *link;
+};
+
+struct bpf_mprog_entry {
+ struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
+ struct bpf_mprog_bundle *parent;
+};
+
+struct bpf_mprog_bundle {
+ struct bpf_mprog_entry a;
+ struct bpf_mprog_entry b;
+ struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
+ struct bpf_prog *ref;
+ atomic64_t revision;
+ u32 count;
+};
+
+struct bpf_tuple {
+ struct bpf_prog *prog;
+ struct bpf_link *link;
+};
+
+static inline struct bpf_mprog_entry *
+bpf_mprog_peer(const struct bpf_mprog_entry *entry)
+{
+ if (entry == &entry->parent->a)
+ return &entry->parent->b;
+ else
+ return &entry->parent->a;
+}
+
+static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
+{
+ BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
+ BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
+ ARRAY_SIZE(bundle->cp_items));
+
+ memset(bundle, 0, sizeof(*bundle));
+ atomic64_set(&bundle->revision, 1);
+ bundle->a.parent = bundle;
+ bundle->b.parent = bundle;
+}
+
+static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
+{
+ entry->parent->count++;
+}
+
+static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
+{
+ entry->parent->count--;
+}
+
+static inline int bpf_mprog_max(void)
+{
+ return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
+}
+
+static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
+{
+ int total = entry->parent->count;
+
+ WARN_ON_ONCE(total > bpf_mprog_max());
+ return total;
+}
+
+static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
+ struct bpf_prog *prog)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *tmp;
+
+ bpf_mprog_foreach_prog(entry, fp, tmp) {
+ if (tmp == prog)
+ return true;
+ }
+ return false;
+}
+
+static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple)
+{
+ WARN_ON_ONCE(entry->parent->ref);
+ if (!tuple->link)
+ entry->parent->ref = tuple->prog;
+}
+
+static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
+{
+ /* In the non-link case prog deletions can only drop the reference
+ * to the prog after the bpf_mprog_entry got swapped and the
+ * bpf_mprog ensured that there are no inflight users anymore.
+ *
+ * Paired with bpf_mprog_mark_for_release().
+ */
+ if (entry->parent->ref) {
+ bpf_prog_put(entry->parent->ref);
+ entry->parent->ref = NULL;
+ }
+}
+
+static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
+{
+ atomic64_inc(&entry->parent->revision);
+}
+
+static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
+{
+ bpf_mprog_complete_release(entry);
+ bpf_mprog_revision_new(entry);
+}
+
+static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
+{
+ return atomic64_read(&entry->parent->revision);
+}
+
+static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
+ struct bpf_mprog_entry *src)
+{
+ memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
+}
+
+static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
+{
+ int total = bpf_mprog_total(entry);
+
+ memmove(entry->fp_items + idx + 1,
+ entry->fp_items + idx,
+ (total - idx) * sizeof(struct bpf_mprog_fp));
+
+ memmove(entry->parent->cp_items + idx + 1,
+ entry->parent->cp_items + idx,
+ (total - idx) * sizeof(struct bpf_mprog_cp));
+}
+
+static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
+{
+ /* Total array size is needed in this case to enure the NULL
+ * entry is copied at the end.
+ */
+ int total = ARRAY_SIZE(entry->fp_items);
+
+ memmove(entry->fp_items + idx,
+ entry->fp_items + idx + 1,
+ (total - idx - 1) * sizeof(struct bpf_mprog_fp));
+
+ memmove(entry->parent->cp_items + idx,
+ entry->parent->cp_items + idx + 1,
+ (total - idx - 1) * sizeof(struct bpf_mprog_cp));
+}
+
+static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
+ struct bpf_mprog_fp **fp,
+ struct bpf_mprog_cp **cp)
+{
+ *fp = &entry->fp_items[idx];
+ *cp = &entry->parent->cp_items[idx];
+}
+
+static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
+ struct bpf_mprog_cp *cp,
+ struct bpf_tuple *tuple)
+{
+ WRITE_ONCE(fp->prog, tuple->prog);
+ cp->link = tuple->link;
+}
+
+int bpf_mprog_attach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog_new, struct bpf_link *link,
+ struct bpf_prog *prog_old,
+ u32 flags, u32 id_or_fd, u64 revision);
+
+int bpf_mprog_detach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog, struct bpf_link *link,
+ u32 flags, u32 id_or_fd, u64 revision);
+
+int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
+ struct bpf_mprog_entry *entry);
+
+static inline bool bpf_mprog_supported(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif /* __BPF_MPROG_H */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5d732f48f787..c55810a43541 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -44,6 +44,7 @@
#define PHY_ID_BCM7366 0x600d8490
#define PHY_ID_BCM7346 0x600d8650
#define PHY_ID_BCM7362 0x600d84b0
+#define PHY_ID_BCM74165 0x359052c0
#define PHY_ID_BCM7425 0x600d86b0
#define PHY_ID_BCM7429 0x600d8730
#define PHY_ID_BCM7435 0x600d8750
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 00950cc03bff..a3462a9b8e18 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -267,5 +267,6 @@ MAX_BTF_TRACING_TYPE,
extern u32 btf_tracing_ids[];
extern u32 bpf_cgroup_btf_id[];
extern u32 bpf_local_storage_map_btf_id[];
+extern u32 btf_bpf_map_id[];
#endif
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 487350bb19c3..cec2d99ae902 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -90,13 +90,19 @@ void cn_del_callback(const struct cb_id *id);
* If @group is not zero, then message will be delivered
* to the specified group.
* @gfp_mask: GFP mask.
+ * @filter: Filter function to be used at netlink layer.
+ * @filter_data:Filter data to be supplied to the filter function
*
* It can be safely called from softirq context, but may silently
* fail under strong memory pressure.
*
* If there are no listeners for given group %-ESRCH can be returned.
*/
-int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask);
+int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid,
+ u32 group, gfp_t gfp_mask,
+ int (*filter)(struct sock *dsk, struct sk_buff *skb,
+ void *data),
+ void *filter_data);
/**
* cn_netlink_send - Sends message to the specified groups.
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index db0f4fcfdaf4..e3b3b0fa2a8f 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -85,12 +85,10 @@ extern void icmpv6_param_prob_reason(struct sk_buff *skb,
struct flowi6;
struct in6_addr;
-extern void icmpv6_flow_init(struct sock *sk,
- struct flowi6 *fl6,
- u8 type,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif);
+
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int oif);
static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 839247a4f48e..0295b47c10a3 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -33,6 +33,7 @@ struct ipv6_devconf {
__s32 accept_ra_defrtr;
__u32 ra_defrtr_metric;
__s32 accept_ra_min_hop_limit;
+ __s32 accept_ra_min_rtr_lft;
__s32 accept_ra_pinfo;
__s32 ignore_routes_with_linkdown;
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -199,14 +200,7 @@ struct inet6_cork {
u8 tclass;
};
-/**
- * struct ipv6_pinfo - ipv6 private area
- *
- * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
- * this _must_ be the last member, so that inet6_sk_generic
- * is able to calculate its offset from the base struct sock
- * by using the struct proto->slab_obj_size member. -acme
- */
+/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_pktinfo sticky_pktinfo;
@@ -306,19 +300,19 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
- /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
- /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
- /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 7308a1a7599b..4f2621e87634 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -316,7 +316,7 @@ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk,
struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, const struct sock *sk, u32 *secid)
LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent)
LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 0f06c2287b52..9b54c4f0677f 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -25,6 +25,7 @@
#define MARVELL_PHY_ID_88X3310 0x002b09a0
#define MARVELL_PHY_ID_88E2110 0x002b09b0
#define MARVELL_PHY_ID_88X2222 0x01410f10
+#define MARVELL_PHY_ID_88Q2110 0x002b0980
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index c1b7008826e5..8fa23bdcedbf 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -537,6 +537,8 @@ static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa)
int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+int __mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
+ u16 set);
int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
u16 mask, u16 set);
@@ -564,6 +566,30 @@ int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum,
int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad,
u32 regnum, u16 mask, u16 set);
+static inline int __mdiodev_read(struct mdio_device *mdiodev, u32 regnum)
+{
+ return __mdiobus_read(mdiodev->bus, mdiodev->addr, regnum);
+}
+
+static inline int __mdiodev_write(struct mdio_device *mdiodev, u32 regnum,
+ u16 val)
+{
+ return __mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val);
+}
+
+static inline int __mdiodev_modify(struct mdio_device *mdiodev, u32 regnum,
+ u16 mask, u16 set)
+{
+ return __mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set);
+}
+
+static inline int __mdiodev_modify_changed(struct mdio_device *mdiodev,
+ u32 regnum, u16 mask, u16 set)
+{
+ return __mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum,
+ mask, set);
+}
+
static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum)
{
return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 33344a71c3e3..b3ad6b9852ec 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -464,10 +464,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reformat_add_esp_trasport[0x1];
u8 reformat_l2_to_l3_esp_tunnel[0x1];
- u8 reserved_at_42[0x1];
+ u8 reformat_add_esp_transport_over_udp[0x1];
u8 reformat_del_esp_trasport[0x1];
u8 reformat_l3_esp_tunnel_to_l2[0x1];
- u8 reserved_at_45[0x1];
+ u8 reformat_del_esp_transport_over_udp[0x1];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
@@ -6665,9 +6665,12 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4 = 0x7,
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9,
+ MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b828c7a75be2..11652e464f5d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1930,8 +1930,7 @@ enum netdev_ml_priv_type {
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
- * @miniq_ingress: ingress/clsact qdisc specific data for
- * ingress processing
+ * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing
* @ingress_queue: XXX: need comments on this one
* @nf_hooks_ingress: netfilter hooks executed for ingress packets
* @broadcast: hw bcast address
@@ -1952,8 +1951,7 @@ enum netdev_ml_priv_type {
* @xps_maps: all CPUs/RXQs maps for XPS device
*
* @xps_maps: XXX: need comments on this one
- * @miniq_egress: clsact qdisc specific data for
- * egress processing
+ * @tcx_egress: BPF & clsact qdisc specific data for egress processing
* @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
@@ -2045,6 +2043,8 @@ enum netdev_ml_priv_type {
* receive offload (GRO)
* @gro_ipv4_max_size: Maximum size of aggregated packet in generic
* receive offload (GRO), for IPv4.
+ * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP
+ * zero copy driver
*
* @dev_addr_shadow: Copy of @dev_addr to catch direct writes.
* @linkwatch_dev_tracker: refcount tracker used by linkwatch.
@@ -2250,11 +2250,11 @@ struct net_device {
#define GRO_MAX_SIZE (8 * 65535u)
unsigned int gro_max_size;
unsigned int gro_ipv4_max_size;
+ unsigned int xdp_zc_max_segs;
rx_handler_func_t __rcu *rx_handler;
void __rcu *rx_handler_data;
-
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc __rcu *miniq_ingress;
+#ifdef CONFIG_NET_XGRESS
+ struct bpf_mprog_entry __rcu *tcx_ingress;
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
@@ -2282,8 +2282,8 @@ struct net_device {
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
#endif
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc __rcu *miniq_egress;
+#ifdef CONFIG_NET_XGRESS
+ struct bpf_mprog_entry __rcu *tcx_egress;
#endif
#ifdef CONFIG_NETFILTER_EGRESS
struct nf_hook_entries __rcu *nf_hooks_egress;
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9eec3f4f5351..75d7de34c908 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -50,6 +50,7 @@ struct netlink_kernel_cfg {
struct mutex *cb_mutex;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release) (struct sock *sk, unsigned long *groups);
};
struct sock *__netlink_kernel_create(struct net *net, int unit,
@@ -227,6 +228,11 @@ bool netlink_strict_get_check(struct sk_buff *skb);
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ __u32 portid, __u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk,
+ struct sk_buff *skb, void *data),
+ void *filter_data);
int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
int netlink_register_notifier(struct notifier_block *nb);
int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 11c1e91563d4..b254848a9c99 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1826,6 +1826,7 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
int genphy_c45_pma_read_abilities(struct phy_device *phydev);
+int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev);
int genphy_c45_read_eee_abilities(struct phy_device *phydev);
int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev);
int genphy_c45_read_status(struct phy_device *phydev);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 1817940a3418..789c516c6b4a 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -9,6 +9,7 @@ struct device_node;
struct ethtool_cmd;
struct fwnode_handle;
struct net_device;
+struct phylink;
enum {
MLO_PAUSE_NONE,
@@ -200,8 +201,6 @@ enum phylink_op_type {
* struct phylink_config - PHYLINK configuration structure
* @dev: a pointer to a struct device associated with the MAC
* @type: operation type of PHYLINK instance
- * @legacy_pre_march2020: driver has not been updated for March 2020 updates
- * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls")
* @poll_fixed_state: if true, starts link_poll,
* if MAC link is at %MLO_AN_FIXED mode.
* @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM.
@@ -215,7 +214,6 @@ enum phylink_op_type {
struct phylink_config {
struct device *dev;
enum phylink_op_type type;
- bool legacy_pre_march2020;
bool poll_fixed_state;
bool mac_managed_pm;
bool ovr_an_inband;
@@ -229,11 +227,9 @@ struct phylink_config {
* struct phylink_mac_ops - MAC operations structure.
* @validate: Validate and update the link configuration.
* @mac_select_pcs: Select a PCS for the interface mode.
- * @mac_pcs_get_state: Read the current link state from the hardware.
* @mac_prepare: prepare for a major reconfiguration of the interface.
* @mac_config: configure the MAC for the selected mode and state.
* @mac_finish: finish a major reconfiguration of the interface.
- * @mac_an_restart: restart 802.3z BaseX autonegotiation.
* @mac_link_down: take the link down.
* @mac_link_up: allow the link to come up.
*
@@ -245,15 +241,12 @@ struct phylink_mac_ops {
struct phylink_link_state *state);
struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config,
phy_interface_t interface);
- void (*mac_pcs_get_state)(struct phylink_config *config,
- struct phylink_link_state *state);
int (*mac_prepare)(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
void (*mac_config)(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
int (*mac_finish)(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
- void (*mac_an_restart)(struct phylink_config *config);
void (*mac_link_down)(struct phylink_config *config, unsigned int mode,
phy_interface_t interface);
void (*mac_link_up)(struct phylink_config *config,
@@ -314,25 +307,6 @@ struct phylink_pcs *mac_select_pcs(struct phylink_config *config,
phy_interface_t interface);
/**
- * mac_pcs_get_state() - Read the current inband link state from the hardware
- * @config: a pointer to a &struct phylink_config.
- * @state: a pointer to a &struct phylink_link_state.
- *
- * Read the current inband link state from the MAC PCS, reporting the
- * current speed in @state->speed, duplex mode in @state->duplex, pause
- * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits,
- * negotiation completion state in @state->an_complete, and link up state
- * in @state->link. If possible, @state->lp_advertising should also be
- * populated.
- *
- * Note: This is a legacy method. This function will not be called unless
- * legacy_pre_march2020 is set in &struct phylink_config and there is no
- * PCS attached.
- */
-void mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state);
-
-/**
* mac_prepare() - prepare to change the PHY interface mode
* @config: a pointer to a &struct phylink_config.
* @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
@@ -368,17 +342,9 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* guaranteed to be correct, and so any mac_config() implementation must
* never reference these fields.
*
- * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set
- * in their &phylnk_config and which don't have a PCS), this function will be
- * called on each link up event, and to also change the in-band advert. For
- * non-legacy drivers, it will only be called to reconfigure the MAC for a
- * "major" change in e.g. interface mode. It will not be called for changes
- * in speed, duplex or pause modes or to change the in-band advertisement.
- * In any case, it is strongly preferred that speed, duplex and pause settings
- * are handled in the mac_link_up() method and not in this method.
- *
- * (this requires a rewrite - please refer to mac_link_up() for situations
- * where the PCS and MAC are not tightly integrated.)
+ * This will only be called to reconfigure the MAC for a "major" change in
+ * e.g. interface mode. It will not be called for changes in speed, duplex
+ * or pause modes or to change the in-band advertisement.
*
* In all negotiation modes, as defined by @mode, @state->pause indicates the
* pause settings which should be applied as follows. If %MLO_PAUSE_AN is not
@@ -410,7 +376,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* 1000base-X or Cisco SGMII mode depending on the @state->interface
* mode). In both cases, link state management (whether the link
* is up or not) is performed by the MAC, and reported via the
- * mac_pcs_get_state() callback. Changes in link state must be made
+ * pcs_get_state() callback. Changes in link state must be made
* by calling phylink_mac_change().
*
* Interface mode specific details are mentioned below.
@@ -459,16 +425,6 @@ int mac_finish(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
/**
- * mac_an_restart() - restart 802.3z BaseX autonegotiation
- * @config: a pointer to a &struct phylink_config.
- *
- * Note: This is a legacy method. This function will not be called unless
- * legacy_pre_march2020 is set in &struct phylink_config and there is no
- * PCS attached.
- */
-void mac_an_restart(struct phylink_config *config);
-
-/**
* mac_link_down() - take the link down
* @config: a pointer to a &struct phylink_config.
* @mode: link autonegotiation mode
@@ -520,14 +476,19 @@ struct phylink_pcs_ops;
/**
* struct phylink_pcs - PHYLINK PCS instance
* @ops: a pointer to the &struct phylink_pcs_ops structure
+ * @phylink: pointer to &struct phylink_config
* @neg_mode: provide PCS neg mode via "mode" argument
* @poll: poll the PCS for link changes
*
* This structure is designed to be embedded within the PCS private data,
* and will be passed between phylink and the PCS.
+ *
+ * The @phylink member is private to phylink and must not be touched by
+ * the PCS driver.
*/
struct phylink_pcs {
const struct phylink_pcs_ops *ops;
+ struct phylink *phylink;
bool neg_mode;
bool poll;
};
@@ -535,6 +496,10 @@ struct phylink_pcs {
/**
* struct phylink_pcs_ops - MAC PCS operations structure.
* @pcs_validate: validate the link configuration.
+ * @pcs_enable: enable the PCS.
+ * @pcs_disable: disable the PCS.
+ * @pcs_pre_config: pre-mac_config method (for errata)
+ * @pcs_post_config: post-mac_config method (for arrata)
* @pcs_get_state: read the current MAC PCS link state from the hardware.
* @pcs_config: configure the MAC PCS for the selected mode and state.
* @pcs_an_restart: restart 802.3z BaseX autonegotiation.
@@ -544,6 +509,12 @@ struct phylink_pcs {
struct phylink_pcs_ops {
int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
+ int (*pcs_enable)(struct phylink_pcs *pcs);
+ void (*pcs_disable)(struct phylink_pcs *pcs);
+ void (*pcs_pre_config)(struct phylink_pcs *pcs,
+ phy_interface_t interface);
+ int (*pcs_post_config)(struct phylink_pcs *pcs,
+ phy_interface_t interface);
void (*pcs_get_state)(struct phylink_pcs *pcs,
struct phylink_link_state *state);
int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode,
@@ -574,6 +545,18 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
/**
+ * pcs_enable() - enable the PCS.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ */
+int pcs_enable(struct phylink_pcs *pcs);
+
+/**
+ * pcs_disable() - disable the PCS.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ */
+void pcs_disable(struct phylink_pcs *pcs);
+
+/**
* pcs_get_state() - Read the current inband link state from the hardware
* @pcs: a pointer to a &struct phylink_pcs.
* @state: a pointer to a &struct phylink_link_state.
@@ -585,8 +568,8 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
* in @state->link. If possible, @state->lp_advertising should also be
* populated.
*
- * When present, this overrides mac_pcs_get_state() in &struct
- * phylink_mac_ops.
+ * When present, this overrides pcs_get_state() in &struct
+ * phylink_pcs_ops.
*/
void pcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state);
@@ -677,6 +660,7 @@ int phylink_fwnode_phy_connect(struct phylink *pl,
void phylink_disconnect_phy(struct phylink *);
void phylink_mac_change(struct phylink *, bool up);
+void phylink_pcs_change(struct phylink_pcs *, bool up);
void phylink_start(struct phylink *);
void phylink_stop(struct phylink *);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7f17acf29dda..7b949292908a 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -138,6 +138,8 @@ static inline int rcu_needs_cpu(void)
return 0;
}
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
+
/*
* Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches.
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 56bccb5a8fde..126f6b418f6a 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -21,6 +21,7 @@ void rcu_softirq_qs(void);
void rcu_note_context_switch(bool preempt);
int rcu_needs_cpu(void);
void rcu_cpu_stall_reset(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
/*
* Note a virtualization-based context switch. This is simply a
diff --git a/include/linux/security.h b/include/linux/security.h
index 32828502f09e..994cf099d9ac 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1439,7 +1439,8 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
void security_sk_free(struct sock *sk);
void security_sk_clone(const struct sock *sk, struct sock *newsk);
-void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic);
+void security_sk_classify_flow(const struct sock *sk,
+ struct flowi_common *flic);
void security_req_classify_flow(const struct request_sock *req,
struct flowi_common *flic);
void security_sock_graft(struct sock*sk, struct socket *parent);
@@ -1597,7 +1598,7 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
{
}
-static inline void security_sk_classify_flow(struct sock *sk,
+static inline void security_sk_classify_flow(const struct sock *sk,
struct flowi_common *flic)
{
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 91ed66952580..16a49ba534e4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -441,8 +441,6 @@ static inline bool skb_frag_must_loop(struct page *p)
copied += p_len, p++, p_off = 0, \
p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \
-#define HAVE_HW_TIME_STAMP
-
/**
* struct skb_shared_hwtstamps - hardware time stamps
* @hwtstamp: hardware time stamp transformed into duration
@@ -944,7 +942,7 @@ struct sk_buff {
__u8 __mono_tc_offset[0];
/* public: */
__u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
#endif
@@ -993,7 +991,7 @@ struct sk_buff {
__u8 csum_not_inet:1;
#endif
-#ifdef CONFIG_NET_SCHED
+#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
__u16 tc_index; /* traffic control index */
#endif
@@ -4023,7 +4021,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
if (likely(hlen - offset >= len))
return (void *)data + offset;
- if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
+ if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
return NULL;
return buffer;
@@ -4036,6 +4034,14 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
skb_headlen(skb), buffer);
}
+static inline void * __must_check
+skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len)
+{
+ if (likely(skb_headlen(skb) - offset >= len))
+ return skb->data + offset;
+ return NULL;
+}
+
/**
* skb_needs_linearize - check if we need to linearize a given skb
* depending on the given device features.
diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h
index e1c88627755a..1a6a851d2cf8 100644
--- a/include/linux/smscphy.h
+++ b/include/linux/smscphy.h
@@ -38,4 +38,38 @@ int smsc_phy_set_tunable(struct phy_device *phydev,
struct ethtool_tunable *tuna, const void *data);
int smsc_phy_probe(struct phy_device *phydev);
+#define MII_LAN874X_PHY_MMD_WOL_WUCSR 0x8010
+#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGA 0x8011
+#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGB 0x8012
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK0 0x8021
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK1 0x8022
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK2 0x8023
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK3 0x8024
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK4 0x8025
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK5 0x8026
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK6 0x8027
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK7 0x8028
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRA 0x8061
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRB 0x8062
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRC 0x8063
+#define MII_LAN874X_PHY_MMD_MCFGR 0x8064
+
+#define MII_LAN874X_PHY_PME1_SET (2 << 13)
+#define MII_LAN874X_PHY_PME2_SET (2 << 11)
+#define MII_LAN874X_PHY_PME_SELF_CLEAR BIT(9)
+#define MII_LAN874X_PHY_WOL_PFDA_FR BIT(7)
+#define MII_LAN874X_PHY_WOL_WUFR BIT(6)
+#define MII_LAN874X_PHY_WOL_MPR BIT(5)
+#define MII_LAN874X_PHY_WOL_BCAST_FR BIT(4)
+#define MII_LAN874X_PHY_WOL_PFDAEN BIT(3)
+#define MII_LAN874X_PHY_WOL_WUEN BIT(2)
+#define MII_LAN874X_PHY_WOL_MPEN BIT(1)
+#define MII_LAN874X_PHY_WOL_BCSTEN BIT(0)
+
+#define MII_LAN874X_PHY_WOL_FILTER_EN BIT(15)
+#define MII_LAN874X_PHY_WOL_FILTER_MCASTTEN BIT(9)
+#define MII_LAN874X_PHY_WOL_FILTER_BCSTEN BIT(8)
+
+#define MII_LAN874X_PHY_PME_SELF_CLEAR_DELAY 0x1000 /* 81 milliseconds */
+
#endif /* __LINUX_SMSCPHY_H__ */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 06090538fe2d..ef67dba775d0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -204,6 +204,19 @@ struct dwmac4_addrs {
u32 mtl_low_cred_offset;
};
+#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0)
+#define STMMAC_FLAG_SPH_DISABLE BIT(1)
+#define STMMAC_FLAG_USE_PHY_WOL BIT(2)
+#define STMMAC_FLAG_HAS_SUN8I BIT(3)
+#define STMMAC_FLAG_TSO_EN BIT(4)
+#define STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP BIT(5)
+#define STMMAC_FLAG_VLAN_FAIL_Q_EN BIT(6)
+#define STMMAC_FLAG_MULTI_MSI_EN BIT(7)
+#define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8)
+#define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9)
+#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10)
+#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11)
+
struct plat_stmmacenet_data {
int bus_id;
int phy_addr;
@@ -266,22 +279,14 @@ struct plat_stmmacenet_data {
struct reset_control *stmmac_ahb_rst;
struct stmmac_axi *axi;
int has_gmac4;
- bool has_sun8i;
- bool tso_en;
int rss_en;
int mac_port_sel_speed;
- bool en_tx_lpi_clockgating;
- bool rx_clk_runs_in_lpi;
int has_xgmac;
- bool vlan_fail_q_en;
u8 vlan_fail_q;
unsigned int eee_usecs_rate;
struct pci_dev *pdev;
int int_snapshot_num;
int ext_snapshot_num;
- bool int_snapshot_en;
- bool ext_snapshot_en;
- bool multi_msi_en;
int msi_mac_vec;
int msi_wol_vec;
int msi_lpi_vec;
@@ -289,10 +294,7 @@ struct plat_stmmacenet_data {
int msi_sfty_ue_vec;
int msi_rx_base_vec;
int msi_tx_base_vec;
- bool use_phy_wol;
- bool sph_disable;
- bool serdes_up_after_phy_linkup;
const struct dwmac4_addrs *dwmac4_addrs;
- bool has_integrated_pcs;
+ unsigned int flags;
};
#endif
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 91a37c99ba66..d16abdb3541a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -172,6 +172,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
return (struct tcp_request_sock *)req;
}
+#define TCP_RMEM_TO_WIN_SCALE 8
+
struct tcp_sock {
/* inet_connection_sock has to be the first member of tcp_sock */
struct inet_connection_sock inet_conn;
@@ -238,7 +240,7 @@ struct tcp_sock {
u32 window_clamp; /* Maximal window to advertise */
u32 rcv_ssthresh; /* Current window clamp */
-
+ u8 scaling_ratio; /* see tcp_win_from_space() */
/* Information of the most recently (s)acked skb */
struct tcp_rack {
u64 mstamp; /* (Re)sent time of the skb */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3930e676436c..e66d04dbe56a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -867,7 +867,8 @@ extern int perf_uprobe_init(struct perf_event *event,
extern void perf_uprobe_destroy(struct perf_event *event);
extern int bpf_get_uprobe_info(const struct perf_event *event,
u32 *fd_type, const char **filename,
- u64 *probe_offset, bool perf_type_tracepoint);
+ u64 *probe_offset, u64 *probe_addr,
+ bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a2b953b57689..f291a3b0f9e5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -30,6 +30,7 @@
FN(TCP_OVERWINDOW) \
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
+ FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
@@ -188,6 +189,8 @@ enum skb_drop_reason {
* LINUX_MIB_PAWSESTABREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
+ SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index d309ee7ed04b..0b9c6aa27047 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -873,8 +873,6 @@ struct dsa_switch_ops {
struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
int port,
phy_interface_t iface);
- int (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
- struct phylink_link_state *state);
int (*phylink_mac_prepare)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
@@ -884,7 +882,6 @@ struct dsa_switch_ops {
int (*phylink_mac_finish)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
- void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
void (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ed4b6ad3fcac..e8750b4ef7e1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -52,6 +52,7 @@ struct ip_tunnel_key {
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be32 label; /* Flow Label for IPv6 */
+ u32 nhid;
__be16 tp_src;
__be16 tp_dst;
__u8 flow_flags;
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cf0d81be5a96..165e7a03b8e9 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net,
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple);
+ const struct nf_conntrack_tuple *tuple, bool unlink);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index f00374718159..7a41c4791536 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -152,7 +152,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_abort_on_overflow;
u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_adv_win_scale;
+ int sysctl_tcp_adv_win_scale; /* obsolete */
u8 sysctl_tcp_dsack;
u8 sysctl_tcp_app_win;
u8 sysctl_tcp_frto;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 126f9e294389..f1d5cc1fa13b 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -18,9 +18,8 @@
*
* API keeps track of in-flight pages, in-order to let API user know
* when it is safe to dealloactor page_pool object. Thus, API users
- * must make sure to call page_pool_release_page() when a page is
- * "leaving" the page_pool. Or call page_pool_put_page() where
- * appropiate. For maintaining correct accounting.
+ * must call page_pool_put_page() where appropriate and only attach
+ * the page to a page_pool-aware objects, like skbs marked for recycling.
*
* API user must only call page_pool_put_page() once on a page, as it
* will either recycle the page, or in case of elevated refcnt, it
@@ -251,7 +250,6 @@ void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
-void page_pool_release_page(struct page_pool *pool, struct page *page);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
@@ -268,10 +266,6 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool,
struct xdp_mem_info *mem)
{
}
-static inline void page_pool_release_page(struct page_pool *pool,
- struct page *page)
-{
-}
static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2ea45c7b53e..139cd09828af 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -866,6 +866,7 @@ struct tc_htb_qopt_offload {
u32 parent_classid;
u16 classid;
u16 qid;
+ u32 quantum;
u64 rate;
u64 ceil;
u8 prio;
diff --git a/include/net/route.h b/include/net/route.h
index 5a5c726472bd..d8d150155195 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -163,7 +163,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
}
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
- struct sock *sk,
+ const struct sock *sk,
__be32 daddr, __be32 saddr,
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
@@ -309,7 +309,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
__be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
struct net *net = sock_net(sk);
struct rtable *rt;
@@ -330,7 +330,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
__be16 orig_sport, __be16 orig_dport,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
if (sport != orig_sport || dport != orig_dport) {
fl4->fl4_dport = dport;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e92f73bb3198..15be2d96b06d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -703,7 +703,7 @@ int skb_do_redirect(struct sk_buff *);
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
return skb->tc_at_ingress;
#else
return false;
diff --git a/include/net/sock.h b/include/net/sock.h
index 2eb916d1ff64..7ae44bf866af 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1339,6 +1339,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ca0312b78294..4d324e2a2eef 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -231,6 +231,7 @@ enum switchdev_notifier_type {
SWITCHDEV_BRPORT_OFFLOADED,
SWITCHDEV_BRPORT_UNOFFLOADED,
+ SWITCHDEV_BRPORT_REPLAY,
};
struct switchdev_notifier_info {
@@ -299,6 +300,11 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0ca972ebd3dd..6ebf54992ffe 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -350,7 +350,6 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule);
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
@@ -606,7 +605,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *);
-void tcp_send_partial(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority);
@@ -1432,13 +1430,39 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
__u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd);
+static inline int __tcp_win_from_space(u8 scaling_ratio, int space)
+{
+ s64 scaled_space = (s64)space * scaling_ratio;
+
+ return scaled_space >> TCP_RMEM_TO_WIN_SCALE;
+}
+
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+ return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space);
+}
+
+/* inverse of __tcp_win_from_space() */
+static inline int __tcp_space_from_win(u8 scaling_ratio, int win)
+{
+ u64 val = (u64)win << TCP_RMEM_TO_WIN_SCALE;
- return tcp_adv_win_scale <= 0 ?
- (space>>(-tcp_adv_win_scale)) :
- space - (space>>tcp_adv_win_scale);
+ do_div(val, scaling_ratio);
+ return val;
+}
+
+static inline int tcp_space_from_win(const struct sock *sk, int win)
+{
+ return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
+}
+
+static inline void tcp_scaling_ratio_init(struct sock *sk)
+{
+ /* Assume a conservative default of 1200 bytes of payload per 4K page.
+ * This may be adjusted later in tcp_measure_rcv_mss().
+ */
+ tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) /
+ SKB_TRUESIZE(4096);
}
/* Note: caller must be prepared to deal with negative returns */
diff --git a/include/net/tcx.h b/include/net/tcx.h
new file mode 100644
index 000000000000..264f147953ba
--- /dev/null
+++ b/include/net/tcx.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __NET_TCX_H
+#define __NET_TCX_H
+
+#include <linux/bpf.h>
+#include <linux/bpf_mprog.h>
+
+#include <net/sch_generic.h>
+
+struct mini_Qdisc;
+
+struct tcx_entry {
+ struct mini_Qdisc __rcu *miniq;
+ struct bpf_mprog_bundle bundle;
+ bool miniq_active;
+ struct rcu_head rcu;
+};
+
+struct tcx_link {
+ struct bpf_link link;
+ struct net_device *dev;
+ u32 location;
+};
+
+static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
+{
+#ifdef CONFIG_NET_XGRESS
+ skb->tc_at_ingress = ingress;
+#endif
+}
+
+#ifdef CONFIG_NET_XGRESS
+static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
+{
+ struct bpf_mprog_bundle *bundle = entry->parent;
+
+ return container_of(bundle, struct tcx_entry, bundle);
+}
+
+static inline struct tcx_link *tcx_link(struct bpf_link *link)
+{
+ return container_of(link, struct tcx_link, link);
+}
+
+static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
+{
+ return tcx_link((struct bpf_link *)link);
+}
+
+void tcx_inc(void);
+void tcx_dec(void);
+
+static inline void tcx_entry_sync(void)
+{
+ /* bpf_mprog_entry got a/b swapped, therefore ensure that
+ * there are no inflight users on the old one anymore.
+ */
+ synchronize_rcu();
+}
+
+static inline void
+tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
+ bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ rcu_assign_pointer(dev->tcx_ingress, entry);
+ else
+ rcu_assign_pointer(dev->tcx_egress, entry);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch(struct net_device *dev, bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ return rcu_dereference_rtnl(dev->tcx_ingress);
+ else
+ return rcu_dereference_rtnl(dev->tcx_egress);
+}
+
+static inline struct bpf_mprog_entry *tcx_entry_create(void)
+{
+ struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
+
+ if (tcx) {
+ bpf_mprog_bundle_init(&tcx->bundle);
+ return &tcx->bundle.a;
+ }
+ return NULL;
+}
+
+static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
+{
+ kfree_rcu(tcx_entry(entry), rcu);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
+{
+ struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);
+
+ *created = false;
+ if (!entry) {
+ entry = tcx_entry_create();
+ if (!entry)
+ return NULL;
+ *created = true;
+ }
+ return entry;
+}
+
+static inline void tcx_skeys_inc(bool ingress)
+{
+ tcx_inc();
+ if (ingress)
+ net_inc_ingress_queue();
+ else
+ net_inc_egress_queue();
+}
+
+static inline void tcx_skeys_dec(bool ingress)
+{
+ if (ingress)
+ net_dec_ingress_queue();
+ else
+ net_dec_egress_queue();
+ tcx_dec();
+}
+
+static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
+ const bool active)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active = active;
+}
+
+static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
+}
+
+static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
+ int code)
+{
+ switch (code) {
+ case TCX_PASS:
+ skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
+ fallthrough;
+ case TCX_DROP:
+ case TCX_REDIRECT:
+ return code;
+ case TCX_NEXT:
+ default:
+ return TCX_NEXT;
+ }
+}
+#endif /* CONFIG_NET_XGRESS */
+
+#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
+int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
+void tcx_uninstall(struct net_device *dev, bool ingress);
+
+int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ tcx_uninstall(dev, true);
+ tcx_uninstall(dev, false);
+}
+#else
+static inline int tcx_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_detach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+}
+#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
+#endif /* __NET_TCX_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e96a1151ec75..1617af380162 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -52,6 +52,7 @@ struct xdp_sock {
struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
+ bool sg;
enum {
XSK_READY = 0,
XSK_BOUND,
@@ -67,6 +68,12 @@ struct xdp_sock {
u64 rx_dropped;
u64 rx_queue_full;
+ /* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
+ * packet, the partially built skb is saved here so that packet building can resume in next
+ * call of __xsk_generic_xmit().
+ */
+ struct sk_buff *skb;
+
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index c243f906ebed..1f6fc8c7a84c 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -89,6 +89,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return !xp_mb_desc(desc);
+}
+
/* Returns as many entries as possible up to max. 0 <= N <= max. */
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
@@ -103,10 +108,45 @@ static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
static inline void xsk_buff_free(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ struct list_head *xskb_list = &xskb->pool->xskb_list;
+ struct xdp_buff_xsk *pos, *tmp;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_del(&pos->xskb_list_node);
+ xp_free(pos);
+ }
+
+ xdp_get_shared_info_from_buff(xdp)->nr_frags = 0;
+out:
xp_free(xskb);
}
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
+}
+
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff *ret = NULL;
+ struct xdp_buff_xsk *frag;
+
+ frag = list_first_entry_or_null(&xskb->pool->xskb_list,
+ struct xdp_buff_xsk, xskb_list_node);
+ if (frag) {
+ list_del(&frag->xskb_list_node);
+ ret = &frag->xdp;
+ }
+
+ return ret;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
@@ -241,6 +281,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return false;
+}
+
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
return 0;
@@ -255,6 +300,15 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ return NULL;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index a8d7b8a3688a..b0bdff26fc88 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,6 +29,7 @@ struct xdp_buff_xsk {
struct xsk_buff_pool *pool;
u64 orig_addr;
struct list_head free_list_node;
+ struct list_head xskb_list_node;
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
@@ -54,6 +55,7 @@ struct xsk_buff_pool {
struct xdp_umem *umem;
struct work_struct work;
struct list_head free_list;
+ struct list_head xskb_list;
u32 heads_cnt;
u16 queue_id;
@@ -184,6 +186,11 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
+static inline bool xp_mb_desc(struct xdp_desc *desc)
+{
+ return desc->options & XDP_PKT_CONTD;
+}
+
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
{
return addr & pool->chunk_mask;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 60a9d59beeab..739c15906a65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1036,6 +1036,8 @@ enum bpf_attach_type {
BPF_LSM_CGROUP,
BPF_STRUCT_OPS,
BPF_NETFILTER,
+ BPF_TCX_INGRESS,
+ BPF_TCX_EGRESS,
__MAX_BPF_ATTACH_TYPE
};
@@ -1053,10 +1055,20 @@ enum bpf_link_type {
BPF_LINK_TYPE_KPROBE_MULTI = 8,
BPF_LINK_TYPE_STRUCT_OPS = 9,
BPF_LINK_TYPE_NETFILTER = 10,
-
+ BPF_LINK_TYPE_TCX = 11,
MAX_BPF_LINK_TYPE,
};
+enum bpf_perf_event_type {
+ BPF_PERF_EVENT_UNSPEC = 0,
+ BPF_PERF_EVENT_UPROBE = 1,
+ BPF_PERF_EVENT_URETPROBE = 2,
+ BPF_PERF_EVENT_KPROBE = 3,
+ BPF_PERF_EVENT_KRETPROBE = 4,
+ BPF_PERF_EVENT_TRACEPOINT = 5,
+ BPF_PERF_EVENT_EVENT = 6,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -1103,7 +1115,12 @@ enum bpf_link_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
+/* Generic attachment flags. */
#define BPF_F_REPLACE (1U << 2)
+#define BPF_F_BEFORE (1U << 3)
+#define BPF_F_AFTER (1U << 4)
+#define BPF_F_ID (1U << 5)
+#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -1434,14 +1451,19 @@ union bpf_attr {
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
- __u32 target_fd; /* container object to attach to */
- __u32 attach_bpf_fd; /* eBPF program to attach */
+ union {
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
+ __u32 attach_bpf_fd;
__u32 attach_type;
__u32 attach_flags;
- __u32 replace_bpf_fd; /* previously attached eBPF
- * program to replace if
- * BPF_F_REPLACE is used
- */
+ __u32 replace_bpf_fd;
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -1487,16 +1509,26 @@ union bpf_attr {
} info;
struct { /* anonymous struct used by BPF_PROG_QUERY command */
- __u32 target_fd; /* container object to query */
+ union {
+ __u32 target_fd; /* target object to query or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type;
__u32 query_flags;
__u32 attach_flags;
__aligned_u64 prog_ids;
- __u32 prog_cnt;
+ union {
+ __u32 prog_cnt;
+ __u32 count;
+ };
+ __u32 :32;
/* output: per-program attach_flags.
* not allowed to be set during effective query.
*/
__aligned_u64 prog_attach_flags;
+ __aligned_u64 link_ids;
+ __aligned_u64 link_attach_flags;
+ __u64 revision;
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -1539,13 +1571,13 @@ union bpf_attr {
__u32 map_fd; /* struct_ops to attach */
};
union {
- __u32 target_fd; /* object to attach to */
- __u32 target_ifindex; /* target ifindex */
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
union {
- __u32 target_btf_id; /* btf_id of target to attach to */
+ __u32 target_btf_id; /* btf_id of target to attach to */
struct {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
@@ -1579,6 +1611,13 @@ union bpf_attr {
__s32 priority;
__u32 flags;
} netfilter;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } tcx;
};
} link_create;
@@ -6187,6 +6226,19 @@ struct bpf_sock_tuple {
};
};
+/* (Simplified) user return codes for tcx prog type.
+ * A valid tcx program must return one of these defined values. All other
+ * return codes are reserved for future use. Must remain compatible with
+ * their TC_ACT_* counter-parts. For compatibility in behavior, unknown
+ * return codes are mapped to TCX_NEXT.
+ */
+enum tcx_action_base {
+ TCX_NEXT = -1,
+ TCX_PASS = 0,
+ TCX_DROP = 2,
+ TCX_REDIRECT = 7,
+};
+
struct bpf_xdp_sock {
__u32 queue_id;
};
@@ -6439,6 +6491,40 @@ struct bpf_link_info {
__s32 priority;
__u32 flags;
} netfilter;
+ struct {
+ __aligned_u64 addrs;
+ __u32 count; /* in/out: kprobe_multi function count */
+ __u32 flags;
+ } kprobe_multi;
+ struct {
+ __u32 type; /* enum bpf_perf_event_type */
+ __u32 :32;
+ union {
+ struct {
+ __aligned_u64 file_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from file_name */
+ } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
+ struct {
+ __aligned_u64 func_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from func_name */
+ __u64 addr;
+ } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
+ struct {
+ __aligned_u64 tp_name; /* in/out */
+ __u32 name_len;
+ } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
+ struct {
+ __u64 config;
+ __u32 type;
+ } event; /* BPF_PERF_EVENT_EVENT */
+ };
+ } perf_event;
+ struct {
+ __u32 ifindex;
+ __u32 attach_type;
+ } tcx;
};
} __attribute__((aligned(8)));
@@ -7012,6 +7098,7 @@ struct bpf_list_head {
struct bpf_list_node {
__u64 :64;
__u64 :64;
+ __u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_root {
@@ -7023,6 +7110,7 @@ struct bpf_rb_node {
__u64 :64;
__u64 :64;
__u64 :64;
+ __u64 :64;
} __attribute__((aligned(8)));
struct bpf_refcount {
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index db210625cee8..f2afb7cc4926 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -30,6 +30,49 @@ enum proc_cn_mcast_op {
PROC_CN_MCAST_IGNORE = 2
};
+#define PROC_EVENT_ALL (PROC_EVENT_FORK | PROC_EVENT_EXEC | PROC_EVENT_UID | \
+ PROC_EVENT_GID | PROC_EVENT_SID | PROC_EVENT_PTRACE | \
+ PROC_EVENT_COMM | PROC_EVENT_NONZERO_EXIT | \
+ PROC_EVENT_COREDUMP | PROC_EVENT_EXIT)
+
+/*
+ * If you add an entry in proc_cn_event, make sure you add it in
+ * PROC_EVENT_ALL above as well.
+ */
+enum proc_cn_event {
+ /* Use successive bits so the enums can be used to record
+ * sets of events as well
+ */
+ PROC_EVENT_NONE = 0x00000000,
+ PROC_EVENT_FORK = 0x00000001,
+ PROC_EVENT_EXEC = 0x00000002,
+ PROC_EVENT_UID = 0x00000004,
+ PROC_EVENT_GID = 0x00000040,
+ PROC_EVENT_SID = 0x00000080,
+ PROC_EVENT_PTRACE = 0x00000100,
+ PROC_EVENT_COMM = 0x00000200,
+ /* "next" should be 0x00000400 */
+ /* "last" is the last process event: exit,
+ * while "next to last" is coredumping event
+ * before that is report only if process dies
+ * with non-zero exit status
+ */
+ PROC_EVENT_NONZERO_EXIT = 0x20000000,
+ PROC_EVENT_COREDUMP = 0x40000000,
+ PROC_EVENT_EXIT = 0x80000000
+};
+
+struct proc_input {
+ enum proc_cn_mcast_op mcast_op;
+ enum proc_cn_event event_type;
+};
+
+static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type)
+{
+ ev_type &= PROC_EVENT_ALL;
+ return ev_type;
+}
+
/*
* From the user's point of view, the process
* ID is the thread group ID and thread ID is the internal
@@ -44,24 +87,7 @@ enum proc_cn_mcast_op {
*/
struct proc_event {
- enum what {
- /* Use successive bits so the enums can be used to record
- * sets of events as well
- */
- PROC_EVENT_NONE = 0x00000000,
- PROC_EVENT_FORK = 0x00000001,
- PROC_EVENT_EXEC = 0x00000002,
- PROC_EVENT_UID = 0x00000004,
- PROC_EVENT_GID = 0x00000040,
- PROC_EVENT_SID = 0x00000080,
- PROC_EVENT_PTRACE = 0x00000100,
- PROC_EVENT_COMM = 0x00000200,
- /* "next" should be 0x00000400 */
- /* "last" is the last process event: exit,
- * while "next to last" is coredumping event */
- PROC_EVENT_COREDUMP = 0x40000000,
- PROC_EVENT_EXIT = 0x80000000
- } what;
+ enum proc_cn_event what;
__u32 cpu;
__u64 __attribute__((aligned(8))) timestamp_ns;
/* Number of nano seconds since system boot */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 0f6a0fe09bdb..ce3117df9cec 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -570,6 +570,7 @@ enum {
IFLA_BRPORT_MCAST_N_GROUPS,
IFLA_BRPORT_MCAST_MAX_GROUPS,
IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
+ IFLA_BRPORT_BACKUP_NHID,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index a78a8096f4ce..8d48863472b9 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -25,6 +25,12 @@
* application.
*/
#define XDP_USE_NEED_WAKEUP (1 << 3)
+/* By setting this option, userspace application indicates that it can
+ * handle multiple descriptors per packet thus enabling AF_XDP to split
+ * multi-buffer XDP frames into multiple Rx descriptors. Without this set
+ * such frames will be dropped.
+ */
+#define XDP_USE_SG (1 << 4)
/* Flags for xsk_umem_config flags */
#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
@@ -108,4 +114,11 @@ struct xdp_desc {
/* UMEM descriptor is __u64 */
+/* Flag indicating that the packet continues with the buffer pointed out by the
+ * next frame in the ring. The end of the packet is signalled by setting this
+ * bit to zero. For single buffer packets, every descriptor has 'options' set
+ * to 0 and this maintains backward compatibility.
+ */
+#define XDP_PKT_CONTD (1 << 0)
+
#endif /* _LINUX_IF_XDP_H */
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index ac56605fe9bc..8b6bcbf6ed4a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -198,6 +198,7 @@ enum {
DEVCONF_IOAM6_ID_WIDE,
DEVCONF_NDISC_EVICT_NOCARRIER,
DEVCONF_ACCEPT_UNTRACKED_NA,
+ DEVCONF_ACCEPT_RA_MIN_RTR_LFT,
DEVCONF_MAX
};
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index b826598d1e94..d03863da180e 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -82,6 +82,8 @@
#define MDIO_AN_10BT1_AN_CTRL 526 /* 10BASE-T1 AN control register */
#define MDIO_AN_10BT1_AN_STAT 527 /* 10BASE-T1 AN status register */
#define MDIO_PMA_PMD_BT1_CTRL 2100 /* BASE-T1 PMA/PMD control register */
+#define MDIO_PCS_1000BT1_CTRL 2304 /* 1000BASE-T1 PCS control register */
+#define MDIO_PCS_1000BT1_STAT 2305 /* 1000BASE-T1 PCS status register */
/* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
#define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */
@@ -332,6 +334,8 @@
#define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */
/* BASE-T1 PMA/PMD extended ability register. */
+#define MDIO_PMA_PMD_BT1_B100_ABLE 0x0001 /* 100BASE-T1 Ability */
+#define MDIO_PMA_PMD_BT1_B1000_ABLE 0x0002 /* 1000BASE-T1 Ability */
#define MDIO_PMA_PMD_BT1_B10L_ABLE 0x0004 /* 10BASE-T1L Ability */
/* BASE-T1 auto-negotiation advertisement register [15:0] */
@@ -373,7 +377,19 @@
#define MDIO_AN_10BT1_AN_STAT_LPA_EEE_T1L 0x4000 /* 10BASE-T1L LP EEE ability advertisement */
/* BASE-T1 PMA/PMD control register */
-#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST 0x4000 /* MASTER-SLAVE config value */
+#define MDIO_PMA_PMD_BT1_CTRL_STRAP 0x000F /* Type selection (Strap) */
+#define MDIO_PMA_PMD_BT1_CTRL_STRAP_B1000 0x0001 /* Select 1000BASE-T1 */
+#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST 0x4000 /* MASTER-SLAVE config value */
+
+/* 1000BASE-T1 PCS control register */
+#define MDIO_PCS_1000BT1_CTRL_LOW_POWER 0x0800 /* Low power mode */
+#define MDIO_PCS_1000BT1_CTRL_DISABLE_TX 0x4000 /* Global PMA transmit disable */
+#define MDIO_PCS_1000BT1_CTRL_RESET 0x8000 /* Software reset value */
+
+/* 1000BASE-T1 PCS status register */
+#define MDIO_PCS_1000BT1_STAT_LINK 0x0004 /* PCS Link is up */
+#define MDIO_PCS_1000BT1_STAT_FAULT 0x0080 /* There is a fault condition */
+
/* EEE Supported/Advertisement/LP Advertisement registers.
*
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 639524b59930..bf71698a1e82 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -41,6 +41,7 @@ enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
NETDEV_A_DEV_XDP_FEATURES,
+ NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
__NETDEV_A_DEV_MAX,
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 2dfe1079f772..6a906ff93006 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -31,6 +31,7 @@ config BPF_SYSCALL
select TASKS_TRACE_RCU
select BINARY_PRINTF
select NET_SOCK_MSG if NET
+ select NET_XGRESS if NET
select PAGE_POOL if NET
default n
help
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 1d3892168d32..f526b7573e97 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
-obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
obj-$(CONFIG_BPF_JIT) += dispatcher.o
@@ -21,6 +21,7 @@ obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
+obj-$(CONFIG_BPF_SYSCALL) += tcx.o
endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 817204d53372..ef9581a580e2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6133,8 +6133,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
- *flag = 0;
again:
+ if (btf_type_is_modifier(t))
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
bpf_log(log, "Type '%s' is not a struct\n", tname);
@@ -6142,6 +6143,14 @@ again:
}
vlen = btf_type_vlen(t);
+ if (BTF_INFO_KIND(t->info) == BTF_KIND_UNION && vlen != 1 && !(*flag & PTR_UNTRUSTED))
+ /*
+ * walking unions yields untrusted pointers
+ * with exception of __bpf_md_ptr and other
+ * unions with a single member
+ */
+ *flag |= PTR_UNTRUSTED;
+
if (off + size > t->size) {
/* If the last element is a variable size array, we may
* need to relax the rule.
@@ -6302,15 +6311,6 @@ error:
* of this field or inside of this struct
*/
if (btf_type_is_struct(mtype)) {
- if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION &&
- btf_type_vlen(mtype) != 1)
- /*
- * walking unions yields untrusted pointers
- * with exception of __bpf_md_ptr and other
- * unions with a single member
- */
- *flag |= PTR_UNTRUSTED;
-
/* our field must be inside that union or struct */
t = mtype;
@@ -6368,7 +6368,7 @@ error:
* that also allows using an array of int as a scratch
* space. e.g. skb->cb[].
*/
- if (off + size > mtrue_end) {
+ if (off + size > mtrue_end && !(*flag & PTR_UNTRUSTED)) {
bpf_log(log,
"access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
mname, mtrue_end, tname, off, size);
@@ -6476,7 +6476,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log,
bool strict)
{
const struct btf_type *type;
- enum bpf_type_flag flag;
+ enum bpf_type_flag flag = 0;
int err;
/* Are we already done? */
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 938a60ff4295..6983af8e093c 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -9,7 +9,6 @@
/**
* struct bpf_cpumask - refcounted BPF cpumask wrapper structure
* @cpumask: The actual cpumask embedded in the struct.
- * @rcu: The RCU head used to free the cpumask with RCU safety.
* @usage: Object reference counter. When the refcount goes to 0, the
* memory is released back to the BPF allocator, which provides
* RCU safety.
@@ -25,7 +24,6 @@
*/
struct bpf_cpumask {
cpumask_t cpumask;
- struct rcu_head rcu;
refcount_t usage;
};
@@ -82,16 +80,6 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
return cpumask;
}
-static void cpumask_free_cb(struct rcu_head *head)
-{
- struct bpf_cpumask *cpumask;
-
- cpumask = container_of(head, struct bpf_cpumask, rcu);
- migrate_disable();
- bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
- migrate_enable();
-}
-
/**
* bpf_cpumask_release() - Release a previously acquired BPF cpumask.
* @cpumask: The cpumask being released.
@@ -102,8 +90,12 @@ static void cpumask_free_cb(struct rcu_head *head)
*/
__bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
{
- if (refcount_dec_and_test(&cpumask->usage))
- call_rcu(&cpumask->rcu, cpumask_free_cb);
+ if (!refcount_dec_and_test(&cpumask->usage))
+ return;
+
+ migrate_disable();
+ bpf_mem_cache_free_rcu(&bpf_cpumask_ma, cpumask);
+ migrate_enable();
}
/**
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 56d3da7d0bc6..a8c7e1c5abfa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -302,6 +302,7 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
struct htab_elem *l;
if (node) {
+ bpf_map_inc_elem_count(&htab->map);
l = container_of(node, struct htab_elem, lru_node);
memcpy(l->key, key, htab->map.key_size);
return l;
@@ -510,12 +511,16 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
+ err = bpf_map_init_elem_count(&htab->map);
+ if (err)
+ goto free_htab;
+
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
- goto free_htab;
+ goto free_elem_count;
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
@@ -593,6 +598,8 @@ free_map_locked:
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
bpf_mem_alloc_destroy(&htab->ma);
+free_elem_count:
+ bpf_map_free_elem_count(&htab->map);
free_htab:
lockdep_unregister_key(&htab->lockdep_key);
bpf_map_area_free(htab);
@@ -804,6 +811,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
check_and_free_fields(htab, l);
+ bpf_map_dec_elem_count(&htab->map);
break;
}
@@ -900,6 +908,8 @@ static bool is_map_full(struct bpf_htab *htab)
static void inc_elem_count(struct bpf_htab *htab)
{
+ bpf_map_inc_elem_count(&htab->map);
+
if (htab->use_percpu_counter)
percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH);
else
@@ -908,6 +918,8 @@ static void inc_elem_count(struct bpf_htab *htab)
static void dec_elem_count(struct bpf_htab *htab)
{
+ bpf_map_dec_elem_count(&htab->map);
+
if (htab->use_percpu_counter)
percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH);
else
@@ -920,6 +932,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
+ bpf_map_dec_elem_count(&htab->map);
check_and_free_fields(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
@@ -1000,6 +1013,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
if (!l)
return ERR_PTR(-E2BIG);
l_new = container_of(l, struct htab_elem, fnode);
+ bpf_map_inc_elem_count(&htab->map);
}
} else {
if (is_map_full(htab))
@@ -1168,6 +1182,7 @@ err:
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
check_and_free_fields(htab, elem);
+ bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
@@ -1357,8 +1372,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
- if (l_new)
+ if (l_new) {
+ bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+ }
return ret;
}
@@ -1523,6 +1540,7 @@ static void htab_map_free(struct bpf_map *map)
prealloc_destroy(htab);
}
+ bpf_map_free_elem_count(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9e80efa59a5d..56ce5008aedd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1942,23 +1942,29 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
return (void *)p__refcounted_kptr;
}
-static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head,
+static int __bpf_list_add(struct bpf_list_node_kern *node,
+ struct bpf_list_head *head,
bool tail, struct btf_record *rec, u64 off)
{
- struct list_head *n = (void *)node, *h = (void *)head;
+ struct list_head *n = &node->list_head, *h = (void *)head;
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
* called on its fields, so init here
*/
if (unlikely(!h->next))
INIT_LIST_HEAD(h);
- if (!list_empty(n)) {
+
+ /* node->owner != NULL implies !list_empty(n), no need to separately
+ * check the latter
+ */
+ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL;
}
tail ? list_add_tail(n, h) : list_add(n, h);
+ WRITE_ONCE(node->owner, head);
return 0;
}
@@ -1967,25 +1973,26 @@ __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
struct bpf_list_node *node,
void *meta__ign, u64 off)
{
+ struct bpf_list_node_kern *n = (void *)node;
struct btf_struct_meta *meta = meta__ign;
- return __bpf_list_add(node, head, false,
- meta ? meta->record : NULL, off);
+ return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
}
__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
struct bpf_list_node *node,
void *meta__ign, u64 off)
{
+ struct bpf_list_node_kern *n = (void *)node;
struct btf_struct_meta *meta = meta__ign;
- return __bpf_list_add(node, head, true,
- meta ? meta->record : NULL, off);
+ return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
}
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
{
struct list_head *n, *h = (void *)head;
+ struct bpf_list_node_kern *node;
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
* called on its fields, so init here
@@ -1994,8 +2001,14 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
INIT_LIST_HEAD(h);
if (list_empty(h))
return NULL;
+
n = tail ? h->prev : h->next;
+ node = container_of(n, struct bpf_list_node_kern, list_head);
+ if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
+ return NULL;
+
list_del_init(n);
+ WRITE_ONCE(node->owner, NULL);
return (struct bpf_list_node *)n;
}
@@ -2012,29 +2025,38 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
struct bpf_rb_node *node)
{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
struct rb_root_cached *r = (struct rb_root_cached *)root;
- struct rb_node *n = (struct rb_node *)node;
+ struct rb_node *n = &node_internal->rb_node;
- if (RB_EMPTY_NODE(n))
+ /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
+ * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
+ */
+ if (READ_ONCE(node_internal->owner) != root)
return NULL;
rb_erase_cached(n, r);
RB_CLEAR_NODE(n);
+ WRITE_ONCE(node_internal->owner, NULL);
return (struct bpf_rb_node *)n;
}
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
* program
*/
-static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+static int __bpf_rbtree_add(struct bpf_rb_root *root,
+ struct bpf_rb_node_kern *node,
void *less, struct btf_record *rec, u64 off)
{
struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
- struct rb_node *parent = NULL, *n = (struct rb_node *)node;
+ struct rb_node *parent = NULL, *n = &node->rb_node;
bpf_callback_t cb = (bpf_callback_t)less;
bool leftmost = true;
- if (!RB_EMPTY_NODE(n)) {
+ /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
+ * check the latter
+ */
+ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL;
@@ -2052,6 +2074,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
rb_link_node(n, parent, link);
rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
+ WRITE_ONCE(node->owner, root);
return 0;
}
@@ -2060,8 +2083,9 @@ __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node
void *meta__ign, u64 off)
{
struct btf_struct_meta *meta = meta__ign;
+ struct bpf_rb_node_kern *n = (void *)node;
- return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off);
+ return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
}
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
@@ -2239,7 +2263,10 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
case BPF_DYNPTR_TYPE_RINGBUF:
return ptr->data + ptr->offset + offset;
case BPF_DYNPTR_TYPE_SKB:
- return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
+ if (buffer__opt)
+ return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
+ else
+ return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
case BPF_DYNPTR_TYPE_XDP:
{
void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index b0fa190b0979..6fc9dae9edc8 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -78,8 +78,7 @@ static const struct seq_operations bpf_map_seq_ops = {
.show = bpf_map_seq_show,
};
-BTF_ID_LIST(btf_bpf_map_id)
-BTF_ID(struct, bpf_map)
+BTF_ID_LIST_GLOBAL_SINGLE(btf_bpf_map_id, struct, bpf_map)
static const struct bpf_iter_seq_info bpf_map_seq_info = {
.seq_ops = &bpf_map_seq_ops,
@@ -93,7 +92,7 @@ static struct bpf_iter_reg bpf_map_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map, map),
- PTR_TO_BTF_ID_OR_NULL },
+ PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.seq_info = &bpf_map_seq_info,
};
@@ -193,3 +192,40 @@ static int __init bpf_map_iter_init(void)
}
late_initcall(bpf_map_iter_init);
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
+{
+ s64 *pcount;
+ s64 ret = 0;
+ int cpu;
+
+ if (!map || !map->elem_count)
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ pcount = per_cpu_ptr(map->elem_count, cpu);
+ ret += READ_ONCE(*pcount);
+ }
+ return ret;
+}
+
+__diag_pop();
+
+BTF_SET8_START(bpf_map_iter_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS)
+BTF_SET8_END(bpf_map_iter_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_map_iter_kfunc_ids,
+};
+
+static int init_subsystem(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_map_iter_kfunc_set);
+}
+late_initcall(init_subsystem);
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 0668bcd7c926..51d6389e5152 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -98,11 +98,23 @@ struct bpf_mem_cache {
int free_cnt;
int low_watermark, high_watermark, batch;
int percpu_size;
+ bool draining;
+ struct bpf_mem_cache *tgt;
- struct rcu_head rcu;
+ /* list of objects to be freed after RCU GP */
struct llist_head free_by_rcu;
+ struct llist_node *free_by_rcu_tail;
struct llist_head waiting_for_gp;
+ struct llist_node *waiting_for_gp_tail;
+ struct rcu_head rcu;
atomic_t call_rcu_in_progress;
+ struct llist_head free_llist_extra_rcu;
+
+ /* list of objects to be freed after RCU tasks trace GP */
+ struct llist_head free_by_rcu_ttrace;
+ struct llist_head waiting_for_gp_ttrace;
+ struct rcu_head rcu_ttrace;
+ atomic_t call_rcu_ttrace_in_progress;
};
struct bpf_mem_caches {
@@ -153,59 +165,83 @@ static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
#endif
}
+static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ /* In RT irq_work runs in per-cpu kthread, so disable
+ * interrupts to avoid preemption and interrupts and
+ * reduce the chance of bpf prog executing on this cpu
+ * when active counter is busy.
+ */
+ local_irq_save(*flags);
+ /* alloc_bulk runs from irq_work which will not preempt a bpf
+ * program that does unit_alloc/unit_free since IRQs are
+ * disabled there. There is no race to increment 'active'
+ * counter. It protects free_llist from corruption in case NMI
+ * bpf prog preempted this loop.
+ */
+ WARN_ON_ONCE(local_inc_return(&c->active) != 1);
+}
+
+static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
+{
+ local_dec(&c->active);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_restore(flags);
+}
+
+static void add_obj_to_free_list(struct bpf_mem_cache *c, void *obj)
+{
+ unsigned long flags;
+
+ inc_active(c, &flags);
+ __llist_add(obj, &c->free_llist);
+ c->free_cnt++;
+ dec_active(c, flags);
+}
+
/* Mostly runs from irq_work except __init phase. */
static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
{
struct mem_cgroup *memcg = NULL, *old_memcg;
- unsigned long flags;
void *obj;
int i;
- memcg = get_memcg(c);
- old_memcg = set_active_memcg(memcg);
for (i = 0; i < cnt; i++) {
/*
- * free_by_rcu is only manipulated by irq work refill_work().
- * IRQ works on the same CPU are called sequentially, so it is
- * safe to use __llist_del_first() here. If alloc_bulk() is
- * invoked by the initial prefill, there will be no running
- * refill_work(), so __llist_del_first() is fine as well.
- *
- * In most cases, objects on free_by_rcu are from the same CPU.
- * If some objects come from other CPUs, it doesn't incur any
- * harm because NUMA_NO_NODE means the preference for current
- * numa node and it is not a guarantee.
+ * For every 'c' llist_del_first(&c->free_by_rcu_ttrace); is
+ * done only by one CPU == current CPU. Other CPUs might
+ * llist_add() and llist_del_all() in parallel.
*/
- obj = __llist_del_first(&c->free_by_rcu);
- if (!obj) {
- /* Allocate, but don't deplete atomic reserves that typical
- * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
- * will allocate from the current numa node which is what we
- * want here.
- */
- obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT);
- if (!obj)
- break;
- }
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- /* In RT irq_work runs in per-cpu kthread, so disable
- * interrupts to avoid preemption and interrupts and
- * reduce the chance of bpf prog executing on this cpu
- * when active counter is busy.
- */
- local_irq_save(flags);
- /* alloc_bulk runs from irq_work which will not preempt a bpf
- * program that does unit_alloc/unit_free since IRQs are
- * disabled there. There is no race to increment 'active'
- * counter. It protects free_llist from corruption in case NMI
- * bpf prog preempted this loop.
+ obj = llist_del_first(&c->free_by_rcu_ttrace);
+ if (!obj)
+ break;
+ add_obj_to_free_list(c, obj);
+ }
+ if (i >= cnt)
+ return;
+
+ for (; i < cnt; i++) {
+ obj = llist_del_first(&c->waiting_for_gp_ttrace);
+ if (!obj)
+ break;
+ add_obj_to_free_list(c, obj);
+ }
+ if (i >= cnt)
+ return;
+
+ memcg = get_memcg(c);
+ old_memcg = set_active_memcg(memcg);
+ for (; i < cnt; i++) {
+ /* Allocate, but don't deplete atomic reserves that typical
+ * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
+ * will allocate from the current numa node which is what we
+ * want here.
*/
- WARN_ON_ONCE(local_inc_return(&c->active) != 1);
- __llist_add(obj, &c->free_llist);
- c->free_cnt++;
- local_dec(&c->active);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- local_irq_restore(flags);
+ obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT);
+ if (!obj)
+ break;
+ add_obj_to_free_list(c, obj);
}
set_active_memcg(old_memcg);
mem_cgroup_put(memcg);
@@ -222,20 +258,24 @@ static void free_one(void *obj, bool percpu)
kfree(obj);
}
-static void free_all(struct llist_node *llnode, bool percpu)
+static int free_all(struct llist_node *llnode, bool percpu)
{
struct llist_node *pos, *t;
+ int cnt = 0;
- llist_for_each_safe(pos, t, llnode)
+ llist_for_each_safe(pos, t, llnode) {
free_one(pos, percpu);
+ cnt++;
+ }
+ return cnt;
}
static void __free_rcu(struct rcu_head *head)
{
- struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
+ struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace);
- free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
- atomic_set(&c->call_rcu_in_progress, 0);
+ free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
+ atomic_set(&c->call_rcu_ttrace_in_progress, 0);
}
static void __free_rcu_tasks_trace(struct rcu_head *head)
@@ -254,60 +294,128 @@ static void enque_to_free(struct bpf_mem_cache *c, void *obj)
struct llist_node *llnode = obj;
/* bpf_mem_cache is a per-cpu object. Freeing happens in irq_work.
- * Nothing races to add to free_by_rcu list.
+ * Nothing races to add to free_by_rcu_ttrace list.
*/
- __llist_add(llnode, &c->free_by_rcu);
+ llist_add(llnode, &c->free_by_rcu_ttrace);
}
-static void do_call_rcu(struct bpf_mem_cache *c)
+static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
{
struct llist_node *llnode, *t;
- if (atomic_xchg(&c->call_rcu_in_progress, 1))
+ if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) {
+ if (unlikely(READ_ONCE(c->draining))) {
+ llnode = llist_del_all(&c->free_by_rcu_ttrace);
+ free_all(llnode, !!c->percpu_size);
+ }
return;
+ }
+
+ WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace));
+ llist_for_each_safe(llnode, t, llist_del_all(&c->free_by_rcu_ttrace))
+ llist_add(llnode, &c->waiting_for_gp_ttrace);
+
+ if (unlikely(READ_ONCE(c->draining))) {
+ __free_rcu(&c->rcu_ttrace);
+ return;
+ }
- WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp));
- llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu))
- /* There is no concurrent __llist_add(waiting_for_gp) access.
- * It doesn't race with llist_del_all either.
- * But there could be two concurrent llist_del_all(waiting_for_gp):
- * from __free_rcu() and from drain_mem_cache().
- */
- __llist_add(llnode, &c->waiting_for_gp);
/* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
* If RCU Tasks Trace grace period implies RCU grace period, free
* these elements directly, else use call_rcu() to wait for normal
* progs to finish and finally do free_one() on each element.
*/
- call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace);
+ call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace);
}
static void free_bulk(struct bpf_mem_cache *c)
{
+ struct bpf_mem_cache *tgt = c->tgt;
struct llist_node *llnode, *t;
unsigned long flags;
int cnt;
+ WARN_ON_ONCE(tgt->unit_size != c->unit_size);
+
do {
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- local_irq_save(flags);
- WARN_ON_ONCE(local_inc_return(&c->active) != 1);
+ inc_active(c, &flags);
llnode = __llist_del_first(&c->free_llist);
if (llnode)
cnt = --c->free_cnt;
else
cnt = 0;
- local_dec(&c->active);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- local_irq_restore(flags);
+ dec_active(c, flags);
if (llnode)
- enque_to_free(c, llnode);
+ enque_to_free(tgt, llnode);
} while (cnt > (c->high_watermark + c->low_watermark) / 2);
/* and drain free_llist_extra */
llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra))
- enque_to_free(c, llnode);
- do_call_rcu(c);
+ enque_to_free(tgt, llnode);
+ do_call_rcu_ttrace(tgt);
+}
+
+static void __free_by_rcu(struct rcu_head *head)
+{
+ struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
+ struct bpf_mem_cache *tgt = c->tgt;
+ struct llist_node *llnode;
+
+ llnode = llist_del_all(&c->waiting_for_gp);
+ if (!llnode)
+ goto out;
+
+ llist_add_batch(llnode, c->waiting_for_gp_tail, &tgt->free_by_rcu_ttrace);
+
+ /* Objects went through regular RCU GP. Send them to RCU tasks trace */
+ do_call_rcu_ttrace(tgt);
+out:
+ atomic_set(&c->call_rcu_in_progress, 0);
+}
+
+static void check_free_by_rcu(struct bpf_mem_cache *c)
+{
+ struct llist_node *llnode, *t;
+ unsigned long flags;
+
+ /* drain free_llist_extra_rcu */
+ if (unlikely(!llist_empty(&c->free_llist_extra_rcu))) {
+ inc_active(c, &flags);
+ llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra_rcu))
+ if (__llist_add(llnode, &c->free_by_rcu))
+ c->free_by_rcu_tail = llnode;
+ dec_active(c, flags);
+ }
+
+ if (llist_empty(&c->free_by_rcu))
+ return;
+
+ if (atomic_xchg(&c->call_rcu_in_progress, 1)) {
+ /*
+ * Instead of kmalloc-ing new rcu_head and triggering 10k
+ * call_rcu() to hit rcutree.qhimark and force RCU to notice
+ * the overload just ask RCU to hurry up. There could be many
+ * objects in free_by_rcu list.
+ * This hint reduces memory consumption for an artificial
+ * benchmark from 2 Gbyte to 150 Mbyte.
+ */
+ rcu_request_urgent_qs_task(current);
+ return;
+ }
+
+ WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp));
+
+ inc_active(c, &flags);
+ WRITE_ONCE(c->waiting_for_gp.first, __llist_del_all(&c->free_by_rcu));
+ c->waiting_for_gp_tail = c->free_by_rcu_tail;
+ dec_active(c, flags);
+
+ if (unlikely(READ_ONCE(c->draining))) {
+ free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
+ atomic_set(&c->call_rcu_in_progress, 0);
+ } else {
+ call_rcu_hurry(&c->rcu, __free_by_rcu);
+ }
}
static void bpf_mem_refill(struct irq_work *work)
@@ -324,6 +432,8 @@ static void bpf_mem_refill(struct irq_work *work)
alloc_bulk(c, c->batch, NUMA_NO_NODE);
else if (cnt > c->high_watermark)
free_bulk(c);
+
+ check_free_by_rcu(c);
}
static void notrace irq_work_raise(struct bpf_mem_cache *c)
@@ -406,6 +516,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c->unit_size = unit_size;
c->objcg = objcg;
c->percpu_size = percpu_size;
+ c->tgt = c;
prefill_mem_cache(c, cpu);
}
ma->cache = pc;
@@ -428,6 +539,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c = &cc->cache[i];
c->unit_size = sizes[i];
c->objcg = objcg;
+ c->tgt = c;
prefill_mem_cache(c, cpu);
}
}
@@ -441,19 +553,57 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
/* No progs are using this bpf_mem_cache, but htab_map_free() called
* bpf_mem_cache_free() for all remaining elements and they can be in
- * free_by_rcu or in waiting_for_gp lists, so drain those lists now.
+ * free_by_rcu_ttrace or in waiting_for_gp_ttrace lists, so drain those lists now.
*
- * Except for waiting_for_gp list, there are no concurrent operations
+ * Except for waiting_for_gp_ttrace list, there are no concurrent operations
* on these lists, so it is safe to use __llist_del_all().
*/
- free_all(__llist_del_all(&c->free_by_rcu), percpu);
- free_all(llist_del_all(&c->waiting_for_gp), percpu);
+ free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu);
+ free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu);
free_all(__llist_del_all(&c->free_llist), percpu);
free_all(__llist_del_all(&c->free_llist_extra), percpu);
+ free_all(__llist_del_all(&c->free_by_rcu), percpu);
+ free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu);
+ free_all(llist_del_all(&c->waiting_for_gp), percpu);
+}
+
+static void check_mem_cache(struct bpf_mem_cache *c)
+{
+ WARN_ON_ONCE(!llist_empty(&c->free_by_rcu_ttrace));
+ WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace));
+ WARN_ON_ONCE(!llist_empty(&c->free_llist));
+ WARN_ON_ONCE(!llist_empty(&c->free_llist_extra));
+ WARN_ON_ONCE(!llist_empty(&c->free_by_rcu));
+ WARN_ON_ONCE(!llist_empty(&c->free_llist_extra_rcu));
+ WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp));
+}
+
+static void check_leaked_objs(struct bpf_mem_alloc *ma)
+{
+ struct bpf_mem_caches *cc;
+ struct bpf_mem_cache *c;
+ int cpu, i;
+
+ if (ma->cache) {
+ for_each_possible_cpu(cpu) {
+ c = per_cpu_ptr(ma->cache, cpu);
+ check_mem_cache(c);
+ }
+ }
+ if (ma->caches) {
+ for_each_possible_cpu(cpu) {
+ cc = per_cpu_ptr(ma->caches, cpu);
+ for (i = 0; i < NUM_CACHES; i++) {
+ c = &cc->cache[i];
+ check_mem_cache(c);
+ }
+ }
+ }
}
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
{
+ check_leaked_objs(ma);
free_percpu(ma->cache);
free_percpu(ma->caches);
ma->cache = NULL;
@@ -462,8 +612,8 @@ static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
static void free_mem_alloc(struct bpf_mem_alloc *ma)
{
- /* waiting_for_gp lists was drained, but __free_rcu might
- * still execute. Wait for it now before we freeing percpu caches.
+ /* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks
+ * might still execute. Wait for them.
*
* rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
* but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
@@ -472,7 +622,8 @@ static void free_mem_alloc(struct bpf_mem_alloc *ma)
* rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
* using rcu_trace_implies_rcu_gp() as well.
*/
- rcu_barrier_tasks_trace();
+ rcu_barrier(); /* wait for __free_by_rcu */
+ rcu_barrier_tasks_trace(); /* wait for __free_rcu */
if (!rcu_trace_implies_rcu_gp())
rcu_barrier();
free_mem_alloc_no_barrier(ma);
@@ -498,7 +649,7 @@ static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress)
return;
}
- copy = kmalloc(sizeof(*ma), GFP_KERNEL);
+ copy = kmemdup(ma, sizeof(*ma), GFP_KERNEL);
if (!copy) {
/* Slow path with inline barrier-s */
free_mem_alloc(ma);
@@ -506,10 +657,7 @@ static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress)
}
/* Defer barriers into worker to let the rest of map memory to be freed */
- copy->cache = ma->cache;
- ma->cache = NULL;
- copy->caches = ma->caches;
- ma->caches = NULL;
+ memset(ma, 0, sizeof(*ma));
INIT_WORK(&copy->work, free_mem_alloc_deferred);
queue_work(system_unbound_wq, &copy->work);
}
@@ -524,17 +672,10 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress = 0;
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(ma->cache, cpu);
- /*
- * refill_work may be unfinished for PREEMPT_RT kernel
- * in which irq work is invoked in a per-CPU RT thread.
- * It is also possible for kernel with
- * arch_irq_work_has_interrupt() being false and irq
- * work is invoked in timer interrupt. So waiting for
- * the completion of irq work to ease the handling of
- * concurrency.
- */
+ WRITE_ONCE(c->draining, true);
irq_work_sync(&c->refill_work);
drain_mem_cache(c);
+ rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
/* objcg is the same across cpus */
@@ -548,8 +689,10 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
cc = per_cpu_ptr(ma->caches, cpu);
for (i = 0; i < NUM_CACHES; i++) {
c = &cc->cache[i];
+ WRITE_ONCE(c->draining, true);
irq_work_sync(&c->refill_work);
drain_mem_cache(c);
+ rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
}
@@ -581,8 +724,10 @@ static void notrace *unit_alloc(struct bpf_mem_cache *c)
local_irq_save(flags);
if (local_inc_return(&c->active) == 1) {
llnode = __llist_del_first(&c->free_llist);
- if (llnode)
+ if (llnode) {
cnt = --c->free_cnt;
+ *(struct bpf_mem_cache **)llnode = c;
+ }
}
local_dec(&c->active);
local_irq_restore(flags);
@@ -606,6 +751,12 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
BUILD_BUG_ON(LLIST_NODE_SZ > 8);
+ /*
+ * Remember bpf_mem_cache that allocated this object.
+ * The hint is not accurate.
+ */
+ c->tgt = *(struct bpf_mem_cache **)llnode;
+
local_irq_save(flags);
if (local_inc_return(&c->active) == 1) {
__llist_add(llnode, &c->free_llist);
@@ -627,6 +778,27 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
irq_work_raise(c);
}
+static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
+{
+ struct llist_node *llnode = ptr - LLIST_NODE_SZ;
+ unsigned long flags;
+
+ c->tgt = *(struct bpf_mem_cache **)llnode;
+
+ local_irq_save(flags);
+ if (local_inc_return(&c->active) == 1) {
+ if (__llist_add(llnode, &c->free_by_rcu))
+ c->free_by_rcu_tail = llnode;
+ } else {
+ llist_add(llnode, &c->free_llist_extra_rcu);
+ }
+ local_dec(&c->active);
+ local_irq_restore(flags);
+
+ if (!atomic_read(&c->call_rcu_in_progress))
+ irq_work_raise(c);
+}
+
/* Called from BPF program or from sys_bpf syscall.
* In both cases migration is disabled.
*/
@@ -660,6 +832,20 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr)
unit_free(this_cpu_ptr(ma->caches)->cache + idx, ptr);
}
+void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr)
+{
+ int idx;
+
+ if (!ptr)
+ return;
+
+ idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ));
+ if (idx < 0)
+ return;
+
+ unit_free_rcu(this_cpu_ptr(ma->caches)->cache + idx, ptr);
+}
+
void notrace *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma)
{
void *ret;
@@ -676,6 +862,14 @@ void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr)
unit_free(this_cpu_ptr(ma->cache), ptr);
}
+void notrace bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr)
+{
+ if (!ptr)
+ return;
+
+ unit_free_rcu(this_cpu_ptr(ma->cache), ptr);
+}
+
/* Directly does a kfree() without putting 'ptr' back to the free_llist
* for reuse and without waiting for a rcu_tasks_trace gp.
* The caller must first go through the rcu_tasks_trace gp for 'ptr'
diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c
new file mode 100644
index 000000000000..f7816d2bc3e4
--- /dev/null
+++ b/kernel/bpf/mprog.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <linux/bpf.h>
+#include <linux/bpf_mprog.h>
+
+static int bpf_mprog_link(struct bpf_tuple *tuple,
+ u32 id_or_fd, u32 flags,
+ enum bpf_prog_type type)
+{
+ struct bpf_link *link = ERR_PTR(-EINVAL);
+ bool id = flags & BPF_F_ID;
+
+ if (id)
+ link = bpf_link_by_id(id_or_fd);
+ else if (id_or_fd)
+ link = bpf_link_get_from_fd(id_or_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ if (type && link->prog->type != type) {
+ bpf_link_put(link);
+ return -EINVAL;
+ }
+
+ tuple->link = link;
+ tuple->prog = link->prog;
+ return 0;
+}
+
+static int bpf_mprog_prog(struct bpf_tuple *tuple,
+ u32 id_or_fd, u32 flags,
+ enum bpf_prog_type type)
+{
+ struct bpf_prog *prog = ERR_PTR(-EINVAL);
+ bool id = flags & BPF_F_ID;
+
+ if (id)
+ prog = bpf_prog_by_id(id_or_fd);
+ else if (id_or_fd)
+ prog = bpf_prog_get(id_or_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ if (type && prog->type != type) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ tuple->link = NULL;
+ tuple->prog = prog;
+ return 0;
+}
+
+static int bpf_mprog_tuple_relative(struct bpf_tuple *tuple,
+ u32 id_or_fd, u32 flags,
+ enum bpf_prog_type type)
+{
+ bool link = flags & BPF_F_LINK;
+ bool id = flags & BPF_F_ID;
+
+ memset(tuple, 0, sizeof(*tuple));
+ if (link)
+ return bpf_mprog_link(tuple, id_or_fd, flags, type);
+ /* If no relevant flag is set and no id_or_fd was passed, then
+ * tuple link/prog is just NULLed. This is the case when before/
+ * after selects first/last position without passing fd.
+ */
+ if (!id && !id_or_fd)
+ return 0;
+ return bpf_mprog_prog(tuple, id_or_fd, flags, type);
+}
+
+static void bpf_mprog_tuple_put(struct bpf_tuple *tuple)
+{
+ if (tuple->link)
+ bpf_link_put(tuple->link);
+ else if (tuple->prog)
+ bpf_prog_put(tuple->prog);
+}
+
+/* The bpf_mprog_{replace,delete}() operate on exact idx position with the
+ * one exception that for deletion we support delete from front/back. In
+ * case of front idx is -1, in case of back idx is bpf_mprog_total(entry).
+ * Adjustment to first and last entry is trivial. The bpf_mprog_insert()
+ * we have to deal with the following cases:
+ *
+ * idx + before:
+ *
+ * Insert P4 before P3: idx for old array is 1, idx for new array is 2,
+ * hence we adjust target idx for the new array, so that memmove copies
+ * P1 and P2 to the new entry, and we insert P4 into idx 2. Inserting
+ * before P1 would have old idx -1 and new idx 0.
+ *
+ * +--+--+--+ +--+--+--+--+ +--+--+--+--+
+ * |P1|P2|P3| ==> |P1|P2| |P3| ==> |P1|P2|P4|P3|
+ * +--+--+--+ +--+--+--+--+ +--+--+--+--+
+ *
+ * idx + after:
+ *
+ * Insert P4 after P2: idx for old array is 2, idx for new array is 2.
+ * Again, memmove copies P1 and P2 to the new entry, and we insert P4
+ * into idx 2. Inserting after P3 would have both old/new idx at 4 aka
+ * bpf_mprog_total(entry).
+ *
+ * +--+--+--+ +--+--+--+--+ +--+--+--+--+
+ * |P1|P2|P3| ==> |P1|P2| |P3| ==> |P1|P2|P4|P3|
+ * +--+--+--+ +--+--+--+--+ +--+--+--+--+
+ */
+static int bpf_mprog_replace(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_tuple *ntuple, int idx)
+{
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+ struct bpf_prog *oprog;
+
+ bpf_mprog_read(entry, idx, &fp, &cp);
+ oprog = READ_ONCE(fp->prog);
+ bpf_mprog_write(fp, cp, ntuple);
+ if (!ntuple->link) {
+ WARN_ON_ONCE(cp->link);
+ bpf_prog_put(oprog);
+ }
+ *entry_new = entry;
+ return 0;
+}
+
+static int bpf_mprog_insert(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_tuple *ntuple, int idx, u32 flags)
+{
+ int total = bpf_mprog_total(entry);
+ struct bpf_mprog_entry *peer;
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+
+ peer = bpf_mprog_peer(entry);
+ bpf_mprog_entry_copy(peer, entry);
+ if (idx == total)
+ goto insert;
+ else if (flags & BPF_F_BEFORE)
+ idx += 1;
+ bpf_mprog_entry_grow(peer, idx);
+insert:
+ bpf_mprog_read(peer, idx, &fp, &cp);
+ bpf_mprog_write(fp, cp, ntuple);
+ bpf_mprog_inc(peer);
+ *entry_new = peer;
+ return 0;
+}
+
+static int bpf_mprog_delete(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_tuple *dtuple, int idx)
+{
+ int total = bpf_mprog_total(entry);
+ struct bpf_mprog_entry *peer;
+
+ peer = bpf_mprog_peer(entry);
+ bpf_mprog_entry_copy(peer, entry);
+ if (idx == -1)
+ idx = 0;
+ else if (idx == total)
+ idx = total - 1;
+ bpf_mprog_entry_shrink(peer, idx);
+ bpf_mprog_dec(peer);
+ bpf_mprog_mark_for_release(peer, dtuple);
+ *entry_new = peer;
+ return 0;
+}
+
+/* In bpf_mprog_pos_*() we evaluate the target position for the BPF
+ * program/link that needs to be replaced, inserted or deleted for
+ * each "rule" independently. If all rules agree on that position
+ * or existing element, then enact replacement, addition or deletion.
+ * If this is not the case, then the request cannot be satisfied and
+ * we bail out with an error.
+ */
+static int bpf_mprog_pos_exact(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple)
+{
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+ int i;
+
+ for (i = 0; i < bpf_mprog_total(entry); i++) {
+ bpf_mprog_read(entry, i, &fp, &cp);
+ if (tuple->prog == READ_ONCE(fp->prog))
+ return tuple->link == cp->link ? i : -EBUSY;
+ }
+ return -ENOENT;
+}
+
+static int bpf_mprog_pos_before(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple)
+{
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+ int i;
+
+ for (i = 0; i < bpf_mprog_total(entry); i++) {
+ bpf_mprog_read(entry, i, &fp, &cp);
+ if (tuple->prog == READ_ONCE(fp->prog) &&
+ (!tuple->link || tuple->link == cp->link))
+ return i - 1;
+ }
+ return tuple->prog ? -ENOENT : -1;
+}
+
+static int bpf_mprog_pos_after(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple)
+{
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+ int i;
+
+ for (i = 0; i < bpf_mprog_total(entry); i++) {
+ bpf_mprog_read(entry, i, &fp, &cp);
+ if (tuple->prog == READ_ONCE(fp->prog) &&
+ (!tuple->link || tuple->link == cp->link))
+ return i + 1;
+ }
+ return tuple->prog ? -ENOENT : bpf_mprog_total(entry);
+}
+
+int bpf_mprog_attach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog_new, struct bpf_link *link,
+ struct bpf_prog *prog_old,
+ u32 flags, u32 id_or_fd, u64 revision)
+{
+ struct bpf_tuple rtuple, ntuple = {
+ .prog = prog_new,
+ .link = link,
+ }, otuple = {
+ .prog = prog_old,
+ .link = link,
+ };
+ int ret, idx = -ERANGE, tidx;
+
+ if (revision && revision != bpf_mprog_revision(entry))
+ return -ESTALE;
+ if (bpf_mprog_exists(entry, prog_new))
+ return -EEXIST;
+ ret = bpf_mprog_tuple_relative(&rtuple, id_or_fd,
+ flags & ~BPF_F_REPLACE,
+ prog_new->type);
+ if (ret)
+ return ret;
+ if (flags & BPF_F_REPLACE) {
+ tidx = bpf_mprog_pos_exact(entry, &otuple);
+ if (tidx < 0) {
+ ret = tidx;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (flags & BPF_F_BEFORE) {
+ tidx = bpf_mprog_pos_before(entry, &rtuple);
+ if (tidx < -1 || (idx >= -1 && tidx != idx)) {
+ ret = tidx < -1 ? tidx : -ERANGE;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (flags & BPF_F_AFTER) {
+ tidx = bpf_mprog_pos_after(entry, &rtuple);
+ if (tidx < -1 || (idx >= -1 && tidx != idx)) {
+ ret = tidx < 0 ? tidx : -ERANGE;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (idx < -1) {
+ if (rtuple.prog || flags) {
+ ret = -EINVAL;
+ goto out;
+ }
+ idx = bpf_mprog_total(entry);
+ flags = BPF_F_AFTER;
+ }
+ if (idx >= bpf_mprog_max()) {
+ ret = -ERANGE;
+ goto out;
+ }
+ if (flags & BPF_F_REPLACE)
+ ret = bpf_mprog_replace(entry, entry_new, &ntuple, idx);
+ else
+ ret = bpf_mprog_insert(entry, entry_new, &ntuple, idx, flags);
+out:
+ bpf_mprog_tuple_put(&rtuple);
+ return ret;
+}
+
+static int bpf_mprog_fetch(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple, int idx)
+{
+ int total = bpf_mprog_total(entry);
+ struct bpf_mprog_cp *cp;
+ struct bpf_mprog_fp *fp;
+ struct bpf_prog *prog;
+ struct bpf_link *link;
+
+ if (idx == -1)
+ idx = 0;
+ else if (idx == total)
+ idx = total - 1;
+ bpf_mprog_read(entry, idx, &fp, &cp);
+ prog = READ_ONCE(fp->prog);
+ link = cp->link;
+ /* The deletion request can either be without filled tuple in which
+ * case it gets populated here based on idx, or with filled tuple
+ * where the only thing we end up doing is the WARN_ON_ONCE() assert.
+ * If we hit a BPF link at the given index, it must not be removed
+ * from opts path.
+ */
+ if (link && !tuple->link)
+ return -EBUSY;
+ WARN_ON_ONCE(tuple->prog && tuple->prog != prog);
+ WARN_ON_ONCE(tuple->link && tuple->link != link);
+ tuple->prog = prog;
+ tuple->link = link;
+ return 0;
+}
+
+int bpf_mprog_detach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog, struct bpf_link *link,
+ u32 flags, u32 id_or_fd, u64 revision)
+{
+ struct bpf_tuple rtuple, dtuple = {
+ .prog = prog,
+ .link = link,
+ };
+ int ret, idx = -ERANGE, tidx;
+
+ if (flags & BPF_F_REPLACE)
+ return -EINVAL;
+ if (revision && revision != bpf_mprog_revision(entry))
+ return -ESTALE;
+ ret = bpf_mprog_tuple_relative(&rtuple, id_or_fd, flags,
+ prog ? prog->type :
+ BPF_PROG_TYPE_UNSPEC);
+ if (ret)
+ return ret;
+ if (dtuple.prog) {
+ tidx = bpf_mprog_pos_exact(entry, &dtuple);
+ if (tidx < 0) {
+ ret = tidx;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (flags & BPF_F_BEFORE) {
+ tidx = bpf_mprog_pos_before(entry, &rtuple);
+ if (tidx < -1 || (idx >= -1 && tidx != idx)) {
+ ret = tidx < -1 ? tidx : -ERANGE;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (flags & BPF_F_AFTER) {
+ tidx = bpf_mprog_pos_after(entry, &rtuple);
+ if (tidx < -1 || (idx >= -1 && tidx != idx)) {
+ ret = tidx < 0 ? tidx : -ERANGE;
+ goto out;
+ }
+ idx = tidx;
+ }
+ if (idx < -1) {
+ if (rtuple.prog || flags) {
+ ret = -EINVAL;
+ goto out;
+ }
+ idx = bpf_mprog_total(entry);
+ flags = BPF_F_AFTER;
+ }
+ if (idx >= bpf_mprog_max()) {
+ ret = -ERANGE;
+ goto out;
+ }
+ ret = bpf_mprog_fetch(entry, &dtuple, idx);
+ if (ret)
+ goto out;
+ ret = bpf_mprog_delete(entry, entry_new, &dtuple, idx);
+out:
+ bpf_mprog_tuple_put(&rtuple);
+ return ret;
+}
+
+int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
+ struct bpf_mprog_entry *entry)
+{
+ u32 __user *uprog_flags, *ulink_flags;
+ u32 __user *uprog_id, *ulink_id;
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+ struct bpf_prog *prog;
+ const u32 flags = 0;
+ int i, ret = 0;
+ u32 id, count;
+ u64 revision;
+
+ if (attr->query.query_flags || attr->query.attach_flags)
+ return -EINVAL;
+ revision = bpf_mprog_revision(entry);
+ count = bpf_mprog_total(entry);
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+ return -EFAULT;
+ if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
+ return -EFAULT;
+ if (copy_to_user(&uattr->query.count, &count, sizeof(count)))
+ return -EFAULT;
+ uprog_id = u64_to_user_ptr(attr->query.prog_ids);
+ uprog_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
+ ulink_id = u64_to_user_ptr(attr->query.link_ids);
+ ulink_flags = u64_to_user_ptr(attr->query.link_attach_flags);
+ if (attr->query.count == 0 || !uprog_id || !count)
+ return 0;
+ if (attr->query.count < count) {
+ count = attr->query.count;
+ ret = -ENOSPC;
+ }
+ for (i = 0; i < bpf_mprog_max(); i++) {
+ bpf_mprog_read(entry, i, &fp, &cp);
+ prog = READ_ONCE(fp->prog);
+ if (!prog)
+ break;
+ id = prog->aux->id;
+ if (copy_to_user(uprog_id + i, &id, sizeof(id)))
+ return -EFAULT;
+ if (uprog_flags &&
+ copy_to_user(uprog_flags + i, &flags, sizeof(flags)))
+ return -EFAULT;
+ id = cp->link ? cp->link->id : 0;
+ if (ulink_id &&
+ copy_to_user(ulink_id + i, &id, sizeof(id)))
+ return -EFAULT;
+ if (ulink_flags &&
+ copy_to_user(ulink_flags + i, &flags, sizeof(flags)))
+ return -EFAULT;
+ if (i + 1 == count)
+ break;
+ }
+ return ret;
+}
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index 8937dc6bc8d0..b83c2f5e9be1 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -50,7 +50,7 @@ iterators.lskel-%.h: $(OUTPUT)/%/iterators.bpf.o | $(BPFTOOL)
$(OUTPUT)/%/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
$(call msg,BPF,$@)
$(Q)mkdir -p $(@D)
- $(Q)$(CLANG) -g -O2 -target bpf -m$* $(INCLUDES) \
+ $(Q)$(CLANG) -g -O2 --target=bpf -m$* $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/kernel/bpf/preload/iterators/iterators.bpf.c b/kernel/bpf/preload/iterators/iterators.bpf.c
index 03af863314ea..b78968b63fab 100644
--- a/kernel/bpf/preload/iterators/iterators.bpf.c
+++ b/kernel/bpf/preload/iterators/iterators.bpf.c
@@ -73,6 +73,8 @@ static const char *get_name(struct btf *btf, long btf_id, const char *fallback)
return str + name_off;
}
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
@@ -84,9 +86,12 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
return 0;
if (seq_num == 0)
- BPF_SEQ_PRINTF(seq, " id name max_entries\n");
+ BPF_SEQ_PRINTF(seq, " id name max_entries cur_entries\n");
+
+ BPF_SEQ_PRINTF(seq, "%4u %-16s %10d %10lld\n",
+ map->id, map->name, map->max_entries,
+ bpf_map_sum_elem_count(map));
- BPF_SEQ_PRINTF(seq, "%4u %-16s%6d\n", map->id, map->name, map->max_entries);
return 0;
}
diff --git a/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h b/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h
index 70f236a82fe1..5b98ab02025e 100644
--- a/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h
+++ b/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-/* THIS FILE IS AUTOGENERATED! */
+/* THIS FILE IS AUTOGENERATED BY BPFTOOL! */
#ifndef __ITERATORS_BPF_SKEL_H__
#define __ITERATORS_BPF_SKEL_H__
@@ -18,8 +18,6 @@ struct iterators_bpf {
int dump_bpf_map_fd;
int dump_bpf_prog_fd;
} links;
- struct iterators_bpf__rodata {
- } *rodata;
};
static inline int
@@ -68,7 +66,6 @@ iterators_bpf__destroy(struct iterators_bpf *skel)
iterators_bpf__detach(skel);
skel_closenz(skel->progs.dump_bpf_map.prog_fd);
skel_closenz(skel->progs.dump_bpf_prog.prog_fd);
- skel_free_map_data(skel->rodata, skel->maps.rodata.initial_value, 4096);
skel_closenz(skel->maps.rodata.map_fd);
skel_free(skel);
}
@@ -81,15 +78,6 @@ iterators_bpf__open(void)
if (!skel)
goto cleanup;
skel->ctx.sz = (void *)&skel->links - (void *)skel;
- skel->rodata = skel_prep_map_data((void *)"\
-\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
-\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\
-\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\
-\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 4096, 98);
- if (!skel->rodata)
- goto cleanup;
- skel->maps.rodata.initial_value = (__u64) (long) skel->rodata;
return skel;
cleanup:
iterators_bpf__destroy(skel);
@@ -103,7 +91,7 @@ iterators_bpf__load(struct iterators_bpf *skel)
int err;
opts.ctx = (struct bpf_loader_ctx *)skel;
- opts.data_sz = 6056;
+ opts.data_sz = 6208;
opts.data = (void *)"\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
@@ -138,190 +126,197 @@ iterators_bpf__load(struct iterators_bpf *skel)
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\
-\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\xf9\x04\0\0\0\0\0\0\0\0\0\x02\x02\0\
+\x18\0\0\0\0\0\0\0\x80\x04\0\0\x80\x04\0\0\x31\x05\0\0\0\0\0\0\0\0\0\x02\x02\0\
\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\0\x04\
\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\0\0\0\0\0\
\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\0\0\0\x20\
-\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\0\0\xb1\0\
-\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\0\0\x80\0\
-\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\0\0\0\0\
-\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x94\x01\0\0\x03\0\0\x04\
-\x18\0\0\0\x9c\x01\0\0\x0e\0\0\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xa4\
-\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb0\x01\0\0\0\0\0\x08\x0f\0\0\0\xb6\x01\0\0\0\0\0\
-\x01\x04\0\0\0\x20\0\0\0\xc3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
-\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xc8\x01\0\0\0\0\0\x01\x04\0\0\0\
-\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x2c\x02\0\0\x02\0\0\x04\x10\0\0\0\x13\0\
-\0\0\x03\0\0\0\0\0\0\0\x3f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x18\0\
-\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x44\x02\0\0\x01\0\0\x0c\
-\x16\0\0\0\x90\x02\0\0\x01\0\0\x04\x08\0\0\0\x99\x02\0\0\x19\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x02\x1a\0\0\0\xea\x02\0\0\x06\0\0\x04\x38\0\0\0\x9c\x01\0\0\x0e\0\0\
-\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xf7\x02\0\0\x1b\0\0\0\xc0\0\0\0\x08\
-\x03\0\0\x15\0\0\0\0\x01\0\0\x11\x03\0\0\x1d\0\0\0\x40\x01\0\0\x1b\x03\0\0\x1e\
-\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\0\0\0\0\
-\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x65\x03\0\0\x02\0\0\x04\
-\x08\0\0\0\x73\x03\0\0\x0e\0\0\0\0\0\0\0\x7c\x03\0\0\x0e\0\0\0\x20\0\0\0\x1b\
-\x03\0\0\x03\0\0\x04\x18\0\0\0\x86\x03\0\0\x1b\0\0\0\0\0\0\0\x8e\x03\0\0\x21\0\
-\0\0\x40\0\0\0\x94\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\0\0\0\
-\0\0\0\0\0\x02\x24\0\0\0\x98\x03\0\0\x01\0\0\x04\x04\0\0\0\xa3\x03\0\0\x0e\0\0\
-\0\0\0\0\0\x0c\x04\0\0\x01\0\0\x04\x04\0\0\0\x15\x04\0\0\x0e\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x8b\x04\0\0\0\0\0\x0e\x25\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\x9f\x04\
-\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\
-\x20\0\0\0\xb5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\
-\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xca\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe1\x04\0\0\0\0\0\x0e\x2d\0\0\
-\0\x01\0\0\0\xe9\x04\0\0\x04\0\0\x0f\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\x28\
-\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\0\
-\x11\0\0\0\xf1\x04\0\0\x01\0\0\x0f\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\x62\
-\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\
-\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
-\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\
-\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\x65\
-\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\
-\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\
-\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\
-\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\
-\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\
-\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\
-\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\
-\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\
-\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\
-\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\
-\x29\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\
-\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x30\
-\x3a\x32\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\
-\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\
-\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
-\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\
-\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
-\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
-\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
-\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
-\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
-\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
-\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
-\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
-\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
-\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
-\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
-\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
-\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
-\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
-\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
-\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
-\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
-\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
-\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
-\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
-\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
-\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
-\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
-\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
-\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
-\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
-\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
-\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
-\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
-\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
-\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
-\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
-\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
-\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
-\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
-\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
-\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
-\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2d\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\
-\0\x04\0\0\0\x62\0\0\0\x01\0\0\0\x80\x04\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\
-\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\
-\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\
-\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
-\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\
-\x25\x73\x20\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\
-\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1b\0\0\
-\0\0\0\x79\x11\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\
-\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\
-\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\
-\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\
-\x7b\x1a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\
-\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x23\0\0\0\xb7\x03\0\0\x0e\0\0\0\
-\xb7\x05\0\0\x18\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
-\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x3c\x01\0\x01\0\0\0\x42\0\0\
-\0\x7b\0\0\0\x24\x3c\x01\0\x02\0\0\0\x42\0\0\0\xee\0\0\0\x1d\x44\x01\0\x03\0\0\
-\0\x42\0\0\0\x0f\x01\0\0\x06\x4c\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\x40\
-\x01\0\x05\0\0\0\x42\0\0\0\x1a\x01\0\0\x1d\x40\x01\0\x06\0\0\0\x42\0\0\0\x43\
-\x01\0\0\x06\x58\x01\0\x08\0\0\0\x42\0\0\0\x56\x01\0\0\x03\x5c\x01\0\x0f\0\0\0\
-\x42\0\0\0\xdc\x01\0\0\x02\x64\x01\0\x1f\0\0\0\x42\0\0\0\x2a\x02\0\0\x01\x6c\
-\x01\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\
-\0\x10\0\0\0\x02\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\
-\x28\0\0\0\x08\0\0\0\x3f\x01\0\0\0\0\0\0\x78\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\
-\x88\0\0\0\x0d\0\0\0\xea\0\0\0\0\0\0\0\xa8\0\0\0\x0d\0\0\0\x3f\x01\0\0\0\0\0\0\
-\x1a\0\0\0\x21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\
-\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\
-\0\0\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\
-\0\0\0\0\x79\x12\x08\0\0\0\0\0\x15\x02\x3c\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x79\
-\x27\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\
-\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
-\x31\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\
-\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\
-\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\
-\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\
-\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\
-\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\
-\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\
-\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\
-\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\
-\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\
-\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\
-\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\
-\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\
-\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x51\0\0\0\xb7\x03\0\0\x11\0\0\0\
-\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
-\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x80\x01\0\x01\0\0\0\x42\0\0\
-\0\x7b\0\0\0\x24\x80\x01\0\x02\0\0\0\x42\0\0\0\x60\x02\0\0\x1f\x88\x01\0\x03\0\
-\0\0\x42\0\0\0\x84\x02\0\0\x06\x94\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\
-\x84\x01\0\x05\0\0\0\x42\0\0\0\x9d\x02\0\0\x0e\xa0\x01\0\x06\0\0\0\x42\0\0\0\
-\x1a\x01\0\0\x1d\x84\x01\0\x07\0\0\0\x42\0\0\0\x43\x01\0\0\x06\xa4\x01\0\x09\0\
-\0\0\x42\0\0\0\xaf\x02\0\0\x03\xa8\x01\0\x11\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\
-\xb0\x01\0\x18\0\0\0\x42\0\0\0\x5a\x03\0\0\x06\x04\x01\0\x1b\0\0\0\x42\0\0\0\0\
-\0\0\0\0\0\0\0\x1c\0\0\0\x42\0\0\0\xab\x03\0\0\x0f\x10\x01\0\x1d\0\0\0\x42\0\0\
-\0\xc0\x03\0\0\x2d\x14\x01\0\x1f\0\0\0\x42\0\0\0\xf7\x03\0\0\x0d\x0c\x01\0\x21\
-\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x22\0\0\0\x42\0\0\0\xc0\x03\0\0\x02\x14\x01\0\
-\x25\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x28\0\0\0\x42\0\0\0\0\0\0\0\0\0\
-\0\0\x29\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x1e\x04\
-\0\0\x0d\x18\x01\0\x2d\0\0\0\x42\0\0\0\x4c\x04\0\0\x1b\x1c\x01\0\x2e\0\0\0\x42\
-\0\0\0\x4c\x04\0\0\x06\x1c\x01\0\x2f\0\0\0\x42\0\0\0\x6f\x04\0\0\x0d\x24\x01\0\
-\x31\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\xb0\x01\0\x40\0\0\0\x42\0\0\0\x2a\x02\0\0\
-\x01\xc0\x01\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\
-\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x14\0\0\0\x3e\0\0\0\
-\0\0\0\0\x28\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x30\0\0\0\x08\0\0\0\x3f\x01\0\0\
-\0\0\0\0\x88\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x98\0\0\0\x1a\0\0\0\xea\0\0\0\0\
-\0\0\0\xb0\0\0\0\x1a\0\0\0\x52\x03\0\0\0\0\0\0\xb8\0\0\0\x1a\0\0\0\x56\x03\0\0\
-\0\0\0\0\xc8\0\0\0\x1f\0\0\0\x84\x03\0\0\0\0\0\0\xe0\0\0\0\x20\0\0\0\xea\0\0\0\
-\0\0\0\0\xf8\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x20\x01\0\0\x24\0\0\0\x3e\0\0\0\
-\0\0\0\0\x58\x01\0\0\x1a\0\0\0\xea\0\0\0\0\0\0\0\x68\x01\0\0\x20\0\0\0\x46\x04\
-\0\0\0\0\0\0\x90\x01\0\0\x1a\0\0\0\x3f\x01\0\0\0\0\0\0\xa0\x01\0\0\x1a\0\0\0\
-\x87\x04\0\0\0\0\0\0\xa8\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x42\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\
-\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x1a\0\
-\0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\
-\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\
-\0\0\0\0";
- opts.insns_sz = 2216;
+\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xb0\0\0\0\x03\0\0\x04\x18\0\0\0\xbe\0\
+\0\0\x09\0\0\0\0\0\0\0\xc2\0\0\0\x0b\0\0\0\x40\0\0\0\xcd\0\0\0\x0b\0\0\0\x80\0\
+\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd5\0\0\0\0\0\0\x07\0\0\0\0\xde\0\0\0\0\0\0\
+\x08\x0c\0\0\0\xe4\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xae\x01\0\0\x03\0\0\x04\
+\x18\0\0\0\xb6\x01\0\0\x0e\0\0\0\0\0\0\0\xb9\x01\0\0\x11\0\0\0\x20\0\0\0\xbe\
+\x01\0\0\x0e\0\0\0\xa0\0\0\0\xca\x01\0\0\0\0\0\x08\x0f\0\0\0\xd0\x01\0\0\0\0\0\
+\x01\x04\0\0\0\x20\0\0\0\xdd\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
+\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xe2\x01\0\0\0\0\0\x01\x04\0\0\0\
+\x20\0\0\0\0\0\0\0\x01\0\0\x0d\x14\0\0\0\x26\x05\0\0\x04\0\0\0\x2b\x02\0\0\0\0\
+\0\x08\x15\0\0\0\x31\x02\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\x01\x3b\x02\0\0\x01\0\
+\0\x0c\x13\0\0\0\0\0\0\0\0\0\0\x02\x18\0\0\0\x52\x02\0\0\x02\0\0\x04\x10\0\0\0\
+\x13\0\0\0\x03\0\0\0\0\0\0\0\x65\x02\0\0\x19\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
+\x1c\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x17\0\0\0\x6a\x02\0\0\x01\0\
+\0\x0c\x1a\0\0\0\xb6\x02\0\0\x01\0\0\x04\x08\0\0\0\xbf\x02\0\0\x1d\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x02\x1e\0\0\0\x10\x03\0\0\x06\0\0\x04\x38\0\0\0\xb6\x01\0\0\
+\x0e\0\0\0\0\0\0\0\xb9\x01\0\0\x11\0\0\0\x20\0\0\0\x1d\x03\0\0\x1f\0\0\0\xc0\0\
+\0\0\x2e\x03\0\0\x19\0\0\0\0\x01\0\0\x37\x03\0\0\x21\0\0\0\x40\x01\0\0\x41\x03\
+\0\0\x22\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
+\0\0\0\0\0\0\0\0\0\x02\x23\0\0\0\0\0\0\0\0\0\0\x02\x24\0\0\0\x8b\x03\0\0\x02\0\
+\0\x04\x08\0\0\0\x99\x03\0\0\x0e\0\0\0\0\0\0\0\xa2\x03\0\0\x0e\0\0\0\x20\0\0\0\
+\x41\x03\0\0\x03\0\0\x04\x18\0\0\0\xac\x03\0\0\x1f\0\0\0\0\0\0\0\xb4\x03\0\0\
+\x25\0\0\0\x40\0\0\0\xba\x03\0\0\x27\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x26\0\0\
+\0\0\0\0\0\0\0\0\x02\x28\0\0\0\xbe\x03\0\0\x01\0\0\x04\x04\0\0\0\xc9\x03\0\0\
+\x0e\0\0\0\0\0\0\0\x32\x04\0\0\x01\0\0\x04\x04\0\0\0\x3b\x04\0\0\x0e\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x20\0\0\0\x12\0\0\0\x30\0\0\0\xb1\x04\0\0\0\0\0\
+\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x20\0\0\0\x12\0\0\0\x1a\0\0\0\
+\xc5\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x20\0\0\0\
+\x12\0\0\0\x20\0\0\0\xdb\x04\0\0\0\0\0\x0e\x2d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
+\0\0\0\0\x20\0\0\0\x12\0\0\0\x11\0\0\0\xf0\x04\0\0\0\0\0\x0e\x2f\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\x07\x05\0\0\0\0\0\x0e\
+\x31\0\0\0\x01\0\0\0\x0f\x05\0\0\x01\0\0\x0f\x04\0\0\0\x36\0\0\0\0\0\0\0\x04\0\
+\0\0\x16\x05\0\0\x04\0\0\x0f\x7b\0\0\0\x2a\0\0\0\0\0\0\0\x30\0\0\0\x2c\0\0\0\
+\x30\0\0\0\x1a\0\0\0\x2e\0\0\0\x4a\0\0\0\x20\0\0\0\x30\0\0\0\x6a\0\0\0\x11\0\0\
+\0\x1e\x05\0\0\x01\0\0\x0f\x04\0\0\0\x32\0\0\0\0\0\0\0\x04\0\0\0\x26\x05\0\0\0\
+\0\0\x0e\x06\0\0\0\x01\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\
+\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\
+\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\
+\x62\x70\x66\x5f\x6d\x61\x70\0\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x73\
+\x70\x73\x6b\x2f\x73\x72\x63\x2f\x62\x70\x66\x2d\x6e\x65\x78\x74\x2f\x6b\x65\
+\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\
+\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\
+\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\
+\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\
+\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\
+\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\
+\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\
+\x69\x67\x6e\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\
+\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\
+\x70\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\
+\x6d\x61\x70\x29\0\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\
+\x6e\x75\x6d\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\
+\x71\x5f\x6e\x75\x6d\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\
+\x3d\x3d\x20\x30\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\
+\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\
+\x72\x69\x65\x73\x20\x20\x63\x75\x72\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\x6e\
+\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\x6d\
+\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\x73\
+\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\x52\
+\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\x5f\
+\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\
+\x75\x20\x25\x2d\x31\x36\x73\x20\x20\x25\x31\x30\x64\x20\x20\x20\x25\x31\x30\
+\x6c\x6c\x64\x5c\x6e\x22\x2c\0\x7d\0\x5f\x5f\x73\x36\x34\0\x6c\x6f\x6e\x67\x20\
+\x6c\x6f\x6e\x67\0\x62\x70\x66\x5f\x6d\x61\x70\x5f\x73\x75\x6d\x5f\x65\x6c\x65\
+\x6d\x5f\x63\x6f\x75\x6e\x74\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\0\x70\x72\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\
+\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\
+\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\
+\x20\x28\x21\x70\x72\x6f\x67\x29\0\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\
+\x78\0\x09\x61\x75\x78\x20\x3d\x20\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\
+\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
+\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\
+\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\
+\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\
+\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\
+\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\x75\
+\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\
+\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\
+\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\
+\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\
+\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\
+\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\
+\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\
+\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\
+\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\x29\x2c\x20\x74\x79\x70\x65\x73\x20\
+\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\x09\x73\x74\x72\x20\x3d\x20\x62\x74\
+\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\
+\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
+\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\x30\x3a\x32\x3a\x30\0\x09\x69\x66\
+\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3e\x3d\x20\x62\x74\x66\x2d\x3e\
+\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\x29\0\x09\x72\x65\x74\x75\x72\x6e\
+\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\
+\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\
+\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\
+\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\
+\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\
+\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x6b\x73\x79\x6d\
+\x73\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x64\x75\x6d\
+\x6d\x79\x5f\x6b\x73\x79\x6d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\xc9\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x7b\0\0\0\x01\0\0\0\
+\x80\0\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\
+\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x34\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\
+\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x20\x20\x63\x75\x72\x5f\x65\
+\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x20\x25\
+\x31\x30\x64\x20\x20\x20\x25\x31\x30\x6c\x6c\x64\x0a\0\x20\x20\x69\x64\x20\x6e\
+\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\
+\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
+\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\
+\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1d\0\0\0\0\0\x79\x21\
+\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe0\xff\
+\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\
+\x30\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\
+\xe0\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\
+\x7b\x2a\xe8\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf0\xff\0\0\0\0\xbf\x71\
+\0\0\0\0\0\0\x85\x20\0\0\0\0\0\0\x7b\x0a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
+\x07\x04\0\0\xe0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\x30\0\0\0\xb7\x03\0\0\x1a\0\0\0\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\
+\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x88\0\0\0\
+\x1e\x44\x01\0\x01\0\0\0\x42\0\0\0\x88\0\0\0\x24\x44\x01\0\x02\0\0\0\x42\0\0\0\
+\xfb\0\0\0\x1d\x4c\x01\0\x03\0\0\0\x42\0\0\0\x1c\x01\0\0\x06\x54\x01\0\x04\0\0\
+\0\x42\0\0\0\x2b\x01\0\0\x1d\x48\x01\0\x05\0\0\0\x42\0\0\0\x50\x01\0\0\x06\x60\
+\x01\0\x07\0\0\0\x42\0\0\0\x63\x01\0\0\x03\x64\x01\0\x0e\0\0\0\x42\0\0\0\xf6\
+\x01\0\0\x02\x6c\x01\0\x21\0\0\0\x42\0\0\0\x29\x02\0\0\x01\x80\x01\0\0\0\0\0\
+\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\
+\x02\0\0\0\xf7\0\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x27\x01\0\0\0\0\0\0\x70\0\0\0\
+\x0d\0\0\0\x3e\0\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xf7\0\0\0\0\0\0\0\xa0\0\0\0\
+\x0d\0\0\0\x27\x01\0\0\0\0\0\0\x1a\0\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\
+\x70\x66\x5f\x6d\x61\x70\0\0\0\0\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\
+\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x09\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\
+\x65\x72\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\0\0\0\0\x62\x70\x66\x5f\x6d\
+\x61\x70\x5f\x73\x75\x6d\x5f\x65\x6c\x65\x6d\x5f\x63\x6f\x75\x6e\x74\0\0\x47\
+\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\
+\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\
+\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\
+\x18\x62\0\0\0\0\0\0\0\0\0\0\x4a\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\
+\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\
+\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\
+\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\
+\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\
+\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\
+\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\
+\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\
+\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\
+\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\
+\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\
+\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\
+\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\
+\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x6a\0\0\0\xb7\
+\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\
+\x95\0\0\0\0\0\0\0\0\0\0\0\x1b\0\0\0\0\0\0\0\x42\0\0\0\x88\0\0\0\x1e\x94\x01\0\
+\x01\0\0\0\x42\0\0\0\x88\0\0\0\x24\x94\x01\0\x02\0\0\0\x42\0\0\0\x86\x02\0\0\
+\x1f\x9c\x01\0\x03\0\0\0\x42\0\0\0\xaa\x02\0\0\x06\xa8\x01\0\x04\0\0\0\x42\0\0\
+\0\xc3\x02\0\0\x0e\xb4\x01\0\x05\0\0\0\x42\0\0\0\x2b\x01\0\0\x1d\x98\x01\0\x06\
+\0\0\0\x42\0\0\0\x50\x01\0\0\x06\xb8\x01\0\x08\0\0\0\x42\0\0\0\xd5\x02\0\0\x03\
+\xbc\x01\0\x10\0\0\0\x42\0\0\0\x45\x03\0\0\x02\xc4\x01\0\x17\0\0\0\x42\0\0\0\
+\x80\x03\0\0\x06\x04\x01\0\x1a\0\0\0\x42\0\0\0\x45\x03\0\0\x02\xc4\x01\0\x1b\0\
+\0\0\x42\0\0\0\xd1\x03\0\0\x0f\x10\x01\0\x1c\0\0\0\x42\0\0\0\xe6\x03\0\0\x2d\
+\x14\x01\0\x1e\0\0\0\x42\0\0\0\x1d\x04\0\0\x0d\x0c\x01\0\x20\0\0\0\x42\0\0\0\
+\x45\x03\0\0\x02\xc4\x01\0\x21\0\0\0\x42\0\0\0\xe6\x03\0\0\x02\x14\x01\0\x24\0\
+\0\0\x42\0\0\0\x44\x04\0\0\x0d\x18\x01\0\x27\0\0\0\x42\0\0\0\x45\x03\0\0\x02\
+\xc4\x01\0\x28\0\0\0\x42\0\0\0\x44\x04\0\0\x0d\x18\x01\0\x2b\0\0\0\x42\0\0\0\
+\x44\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x72\x04\0\0\x1b\x1c\x01\0\x2d\0\
+\0\0\x42\0\0\0\x72\x04\0\0\x06\x1c\x01\0\x2e\0\0\0\x42\0\0\0\x95\x04\0\0\x0d\
+\x24\x01\0\x30\0\0\0\x42\0\0\0\x45\x03\0\0\x02\xc4\x01\0\x3f\0\0\0\x42\0\0\0\
+\x29\x02\0\0\x01\xd4\x01\0\0\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\
+\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x18\0\0\0\xf7\0\0\0\0\0\0\0\x20\0\0\0\x1c\0\0\
+\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x27\x01\0\0\0\0\0\0\x80\0\0\0\x1e\0\0\
+\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1e\0\0\0\xf7\0\0\0\0\0\0\0\xa8\0\0\0\x1e\0\0\0\
+\x78\x03\0\0\0\0\0\0\xb0\0\0\0\x1e\0\0\0\x7c\x03\0\0\0\0\0\0\xc0\0\0\0\x23\0\0\
+\0\xaa\x03\0\0\0\0\0\0\xd8\0\0\0\x24\0\0\0\xf7\0\0\0\0\0\0\0\xf0\0\0\0\x24\0\0\
+\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x28\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\0\0\x1e\0\
+\0\0\xf7\0\0\0\0\0\0\0\x60\x01\0\0\x24\0\0\0\x6c\x04\0\0\0\0\0\0\x88\x01\0\0\
+\x1e\0\0\0\x27\x01\0\0\0\0\0\0\x98\x01\0\0\x1e\0\0\0\xad\x04\0\0\0\0\0\0\xa0\
+\x01\0\0\x1c\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x41\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\
+\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x19\0\0\0\x01\0\0\0\0\0\0\0\
+\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\
+\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0";
+ opts.insns_sz = 2456;
opts.insns = (void *)"\
\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\
\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\
@@ -331,79 +326,83 @@ iterators_bpf__load(struct iterators_bpf *skel)
\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\
\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xbf\x70\0\0\
\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
-\x48\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\
-\0\0\x44\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\
-\0\0\0\0\x38\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\
-\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\
-\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\
+\xe8\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\
+\0\0\xe4\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\
+\0\0\0\0\xd8\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\xd0\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\xd0\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\
\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\
-\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\
+\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x0f\0\0\x63\x01\0\0\0\
\0\0\0\x61\x60\x1c\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
-\x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\
-\0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\
+\xfc\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\
+\0\xf0\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\
\xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\
-\0\x79\x63\x20\0\0\0\0\0\x15\x03\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\
-\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x61\x60\x04\0\0\0\0\0\x45\0\x02\0\x01\0\0\0\x85\
+\0\x79\x63\x20\0\0\0\0\0\x15\x03\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x38\
+\x0f\0\0\xb7\x02\0\0\x7b\0\0\0\x61\x60\x04\0\0\0\0\0\x45\0\x02\0\x01\0\0\0\x85\
\0\0\0\x94\0\0\0\x05\0\x01\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x18\x62\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\
-\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\
-\0\0\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\
-\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\
-\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\
+\0\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc0\x0f\0\0\x63\
+\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xb8\x0f\0\0\x18\x61\0\0\0\0\0\0\0\
+\0\0\0\xc8\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\xd0\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\xc0\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\
\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x9f\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\
-\x01\0\0\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\
+\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\x0f\0\0\x63\
+\x01\0\0\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\xe0\x0f\0\0\
\xb7\x03\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x92\xff\
-\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
-\x78\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\
-\x61\0\0\0\0\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
-\0\0\0\x40\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\
-\x18\x60\0\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\
-\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\
-\0\0\0\0\0\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\
-\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\
-\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\
-\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\
-\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\
-\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x20\x12\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xf0\x0f\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x18\x12\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
+\0\0\0\x08\x11\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x60\x12\0\0\x7b\x01\0\0\0\0\0\0\
+\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x11\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x70\x12\0\
+\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xa0\x11\0\0\x18\x61\0\0\0\0\
+\0\0\0\0\0\0\x90\x12\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x12\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\
+\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x12\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\
+\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x2c\x12\0\0\x63\x01\0\0\0\0\0\0\x79\x60\
+\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x12\0\0\x7b\x01\0\0\0\0\0\0\x61\
+\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x58\x12\0\0\x63\x01\0\0\0\0\0\
+\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xa0\x12\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\
\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\
-\x5c\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\
-\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\
-\0\0\0\0\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\
-\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\
-\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4a\xff\0\0\
-\0\0\x63\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\
-\0\0\0\0\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\
-\x18\x12\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\
-\x60\0\0\0\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\
-\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\
-\0\0\0\0\0\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\
-\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\
-\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\
-\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\
-\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\
-\x01\0\0\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\
-\0\x63\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\
-\0\0\0\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\
-\0\0\0\0\0\xc5\x07\x13\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\
-\x70\x6c\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\
-\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\
-\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\
-\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\
-\x07\x01\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\
-\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\
-\x63\x06\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\
-\0\0\0\0\x95\0\0\0\0\0\0\0";
+\x5c\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x63\x70\x6c\0\0\0\0\0\
+\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\x18\x68\0\0\0\0\0\0\0\0\0\0\xa8\
+\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x12\0\0\xb7\x02\0\0\x17\0\0\0\xb7\x03\
+\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\
+\x07\x4d\xff\0\0\0\0\x75\x07\x03\0\0\0\0\0\x62\x08\x04\0\0\0\0\0\x6a\x08\x02\0\
+\0\0\0\0\x05\0\x0a\0\0\0\0\0\x63\x78\x04\0\0\0\0\0\xbf\x79\0\0\0\0\0\0\x77\x09\
+\0\0\x20\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\x63\x90\0\0\0\0\0\0\x55\
+\x09\x02\0\0\0\0\0\x6a\x08\x02\0\0\0\0\0\x05\0\x01\0\0\0\0\0\x6a\x08\x02\0\x40\
+\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\xb7\x03\0\
+\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\
+\0\0\x01\0\0\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\
+\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x80\x12\0\0\x61\x01\0\0\0\0\0\0\
+\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x2c\xff\
+\0\0\0\0\x63\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x12\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\xa8\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
+\0\0\0\xd8\x12\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xa0\x17\0\0\x7b\x01\0\0\0\0\0\0\
+\x18\x60\0\0\0\0\0\0\0\0\0\0\xe0\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe8\x17\0\
+\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x14\0\0\x18\x61\0\0\0\0\
+\0\0\0\0\0\0\xf8\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x78\
+\x16\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x18\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x10\x18\0\0\x7b\x01\0\0\
+\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x17\0\0\x63\x01\
+\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb4\x17\0\0\x63\
+\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x17\0\0\
+\x7b\x01\0\0\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\
+\x17\0\0\x63\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x18\0\0\xb7\x02\0\
+\0\x12\0\0\0\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\
+\x07\0\0\0\0\0\0\xc5\x07\xf5\xfe\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x17\0\
+\0\x63\x70\x6c\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\
+\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x98\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\
+\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x08\x18\0\0\
+\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\
+\0\0\xc5\x07\xe3\xfe\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\
+\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\
+\0\0\0\0\x63\x06\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\
+\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0";
err = bpf_load_and_run(&opts);
if (err < 0)
return err;
- skel->rodata = skel_finalize_map_data(&skel->maps.rodata.initial_value,
- 4096, PROT_READ, skel->maps.rodata.map_fd);
- if (!skel->rodata)
- return -ENOMEM;
return 0;
}
@@ -422,4 +421,15 @@ iterators_bpf__open_and_load(void)
return skel;
}
+__attribute__((unused)) static void
+iterators_bpf__assert(struct iterators_bpf *s __attribute__((unused)))
+{
+#ifdef __cplusplus
+#define _Static_assert static_assert
+#endif
+#ifdef __cplusplus
+#undef _Static_assert
+#endif
+}
+
#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 875ac9b698d9..f045fde632e5 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -23,15 +23,6 @@
#define RINGBUF_MAX_RECORD_SZ (UINT_MAX/4)
-/* Maximum size of ring buffer area is limited by 32-bit page offset within
- * record header, counted in pages. Reserve 8 bits for extensibility, and take
- * into account few extra pages for consumer/producer pages and
- * non-mmap()'able parts. This gives 64GB limit, which seems plenty for single
- * ring buffer.
- */
-#define RINGBUF_MAX_DATA_SZ \
- (((1ULL << 24) - RINGBUF_POS_PAGES - RINGBUF_PGOFF) * PAGE_SIZE)
-
struct bpf_ringbuf {
wait_queue_head_t waitq;
struct irq_work work;
@@ -161,6 +152,17 @@ static void bpf_ringbuf_notify(struct irq_work *work)
wake_up_all(&rb->waitq);
}
+/* Maximum size of ring buffer area is limited by 32-bit page offset within
+ * record header, counted in pages. Reserve 8 bits for extensibility, and
+ * take into account few extra pages for consumer/producer pages and
+ * non-mmap()'able parts, the current maximum size would be:
+ *
+ * (((1ULL << 24) - RINGBUF_POS_PAGES - RINGBUF_PGOFF) * PAGE_SIZE)
+ *
+ * This gives 64GB limit, which seems plenty for single ring buffer. Now
+ * considering that the maximum value of data_sz is (4GB - 1), there
+ * will be no overflow, so just note the size limit in the comments.
+ */
static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
{
struct bpf_ringbuf *rb;
@@ -193,12 +195,6 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
!PAGE_ALIGNED(attr->max_entries))
return ERR_PTR(-EINVAL);
-#ifdef CONFIG_64BIT
- /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */
- if (attr->max_entries > RINGBUF_MAX_DATA_SZ)
- return ERR_PTR(-E2BIG);
-#endif
-
rb_map = bpf_map_area_alloc(sizeof(*rb_map), NUMA_NO_NODE);
if (!rb_map)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a2aef900519c..7f4e8c357a6a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -37,6 +37,8 @@
#include <linux/trace_events.h>
#include <net/netfilter/nf_bpf_link.h>
+#include <net/tcx.h>
+
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
@@ -3295,6 +3297,25 @@ static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
raw_tp_link->btp->tp->name);
}
+static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen,
+ u32 len)
+{
+ if (ulen >= len + 1) {
+ if (copy_to_user(ubuf, buf, len + 1))
+ return -EFAULT;
+ } else {
+ char zero = '\0';
+
+ if (copy_to_user(ubuf, buf, ulen - 1))
+ return -EFAULT;
+ if (put_user(zero, ubuf + ulen - 1))
+ return -EFAULT;
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
@@ -3313,20 +3334,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
if (!ubuf)
return 0;
- if (ulen >= tp_len + 1) {
- if (copy_to_user(ubuf, tp_name, tp_len + 1))
- return -EFAULT;
- } else {
- char zero = '\0';
-
- if (copy_to_user(ubuf, tp_name, ulen - 1))
- return -EFAULT;
- if (put_user(zero, ubuf + ulen - 1))
- return -EFAULT;
- return -ENOSPC;
- }
-
- return 0;
+ return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len);
}
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
@@ -3358,9 +3366,155 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
kfree(perf_link);
}
+static int bpf_perf_link_fill_common(const struct perf_event *event,
+ char __user *uname, u32 ulen,
+ u64 *probe_offset, u64 *probe_addr,
+ u32 *fd_type)
+{
+ const char *buf;
+ u32 prog_id;
+ size_t len;
+ int err;
+
+ if (!ulen ^ !uname)
+ return -EINVAL;
+ if (!uname)
+ return 0;
+
+ err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
+ probe_offset, probe_addr);
+ if (err)
+ return err;
+
+ if (buf) {
+ len = strlen(buf);
+ err = bpf_copy_to_user(uname, buf, ulen, len);
+ if (err)
+ return err;
+ } else {
+ char zero = '\0';
+
+ if (put_user(zero, uname))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_KPROBE_EVENTS
+static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
+ struct bpf_link_info *info)
+{
+ char __user *uname;
+ u64 addr, offset;
+ u32 ulen, type;
+ int err;
+
+ uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
+ ulen = info->perf_event.kprobe.name_len;
+ err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+ &type);
+ if (err)
+ return err;
+ if (type == BPF_FD_TYPE_KRETPROBE)
+ info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
+ else
+ info->perf_event.type = BPF_PERF_EVENT_KPROBE;
+
+ info->perf_event.kprobe.offset = offset;
+ if (!kallsyms_show_value(current_cred()))
+ addr = 0;
+ info->perf_event.kprobe.addr = addr;
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_UPROBE_EVENTS
+static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
+ struct bpf_link_info *info)
+{
+ char __user *uname;
+ u64 addr, offset;
+ u32 ulen, type;
+ int err;
+
+ uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
+ ulen = info->perf_event.uprobe.name_len;
+ err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+ &type);
+ if (err)
+ return err;
+
+ if (type == BPF_FD_TYPE_URETPROBE)
+ info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
+ else
+ info->perf_event.type = BPF_PERF_EVENT_UPROBE;
+ info->perf_event.uprobe.offset = offset;
+ return 0;
+}
+#endif
+
+static int bpf_perf_link_fill_probe(const struct perf_event *event,
+ struct bpf_link_info *info)
+{
+#ifdef CONFIG_KPROBE_EVENTS
+ if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE)
+ return bpf_perf_link_fill_kprobe(event, info);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+ if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE)
+ return bpf_perf_link_fill_uprobe(event, info);
+#endif
+ return -EOPNOTSUPP;
+}
+
+static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
+ struct bpf_link_info *info)
+{
+ char __user *uname;
+ u32 ulen;
+
+ uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
+ ulen = info->perf_event.tracepoint.name_len;
+ info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
+ return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
+}
+
+static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
+ struct bpf_link_info *info)
+{
+ info->perf_event.event.type = event->attr.type;
+ info->perf_event.event.config = event->attr.config;
+ info->perf_event.type = BPF_PERF_EVENT_EVENT;
+ return 0;
+}
+
+static int bpf_perf_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_perf_link *perf_link;
+ const struct perf_event *event;
+
+ perf_link = container_of(link, struct bpf_perf_link, link);
+ event = perf_get_event(perf_link->perf_file);
+ if (IS_ERR(event))
+ return PTR_ERR(event);
+
+ switch (event->prog->type) {
+ case BPF_PROG_TYPE_PERF_EVENT:
+ return bpf_perf_link_fill_perf_event(event, info);
+ case BPF_PROG_TYPE_TRACEPOINT:
+ return bpf_perf_link_fill_tracepoint(event, info);
+ case BPF_PROG_TYPE_KPROBE:
+ return bpf_perf_link_fill_probe(event, info);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct bpf_link_ops bpf_perf_link_lops = {
.release = bpf_perf_link_release,
.dealloc = bpf_perf_link_dealloc,
+ .fill_link_info = bpf_perf_link_fill_link_info,
};
static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -3588,31 +3742,45 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_XDP;
case BPF_LSM_CGROUP:
return BPF_PROG_TYPE_LSM;
+ case BPF_TCX_INGRESS:
+ case BPF_TCX_EGRESS:
+ return BPF_PROG_TYPE_SCHED_CLS;
default:
return BPF_PROG_TYPE_UNSPEC;
}
}
-#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
+#define BPF_PROG_ATTACH_LAST_FIELD expected_revision
-#define BPF_F_ATTACH_MASK \
- (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
+#define BPF_F_ATTACH_MASK_BASE \
+ (BPF_F_ALLOW_OVERRIDE | \
+ BPF_F_ALLOW_MULTI | \
+ BPF_F_REPLACE)
+
+#define BPF_F_ATTACH_MASK_MPROG \
+ (BPF_F_REPLACE | \
+ BPF_F_BEFORE | \
+ BPF_F_AFTER | \
+ BPF_F_ID | \
+ BPF_F_LINK)
static int bpf_prog_attach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
struct bpf_prog *prog;
+ u32 mask;
int ret;
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
- if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
- return -EINVAL;
-
ptype = attach_type_to_prog_type(attr->attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC)
return -EINVAL;
+ mask = bpf_mprog_supported(ptype) ?
+ BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE;
+ if (attr->attach_flags & ~mask)
+ return -EINVAL;
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
@@ -3648,6 +3816,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
else
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ ret = tcx_prog_attach(attr, prog);
+ break;
default:
ret = -EINVAL;
}
@@ -3657,25 +3828,41 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return ret;
}
-#define BPF_PROG_DETACH_LAST_FIELD attach_type
+#define BPF_PROG_DETACH_LAST_FIELD expected_revision
static int bpf_prog_detach(const union bpf_attr *attr)
{
+ struct bpf_prog *prog = NULL;
enum bpf_prog_type ptype;
+ int ret;
if (CHECK_ATTR(BPF_PROG_DETACH))
return -EINVAL;
ptype = attach_type_to_prog_type(attr->attach_type);
+ if (bpf_mprog_supported(ptype)) {
+ if (ptype == BPF_PROG_TYPE_UNSPEC)
+ return -EINVAL;
+ if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
+ return -EINVAL;
+ if (attr->attach_bpf_fd) {
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+ }
switch (ptype) {
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_SK_SKB:
- return sock_map_prog_detach(attr, ptype);
+ ret = sock_map_prog_detach(attr, ptype);
+ break;
case BPF_PROG_TYPE_LIRC_MODE2:
- return lirc_prog_detach(attr);
+ ret = lirc_prog_detach(attr);
+ break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
- return netns_bpf_prog_detach(attr, ptype);
+ ret = netns_bpf_prog_detach(attr, ptype);
+ break;
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
@@ -3684,13 +3871,21 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_LSM:
- return cgroup_bpf_prog_detach(attr, ptype);
+ ret = cgroup_bpf_prog_detach(attr, ptype);
+ break;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ ret = tcx_prog_detach(attr, prog);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
+
+ if (prog)
+ bpf_prog_put(prog);
+ return ret;
}
-#define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
+#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -3738,6 +3933,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_SK_MSG_VERDICT:
case BPF_SK_SKB_VERDICT:
return sock_map_bpf_prog_query(attr, uattr);
+ case BPF_TCX_INGRESS:
+ case BPF_TCX_EGRESS:
+ return tcx_prog_query(attr, uattr);
default:
return -EINVAL;
}
@@ -4700,6 +4898,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
goto out;
}
break;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ if (attr->link_create.attach_type != BPF_TCX_INGRESS &&
+ attr->link_create.attach_type != BPF_TCX_EGRESS) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
default:
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
@@ -4751,6 +4956,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
case BPF_PROG_TYPE_XDP:
ret = bpf_xdp_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ ret = tcx_link_attach(attr, prog);
+ break;
case BPF_PROG_TYPE_NETFILTER:
ret = bpf_nf_link_attach(attr, prog);
break;
diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c
new file mode 100644
index 000000000000..69a272712b29
--- /dev/null
+++ b/kernel/bpf/tcx.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <linux/bpf.h>
+#include <linux/bpf_mprog.h>
+#include <linux/netdevice.h>
+
+#include <net/tcx.h>
+
+int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ bool created, ingress = attr->attach_type == BPF_TCX_INGRESS;
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_mprog_entry *entry, *entry_new;
+ struct bpf_prog *replace_prog = NULL;
+ struct net_device *dev;
+ int ret;
+
+ rtnl_lock();
+ dev = __dev_get_by_index(net, attr->target_ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ if (attr->attach_flags & BPF_F_REPLACE) {
+ replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
+ prog->type);
+ if (IS_ERR(replace_prog)) {
+ ret = PTR_ERR(replace_prog);
+ replace_prog = NULL;
+ goto out;
+ }
+ }
+ entry = tcx_entry_fetch_or_create(dev, ingress, &created);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
+ attr->attach_flags, attr->relative_fd,
+ attr->expected_revision);
+ if (!ret) {
+ if (entry != entry_new) {
+ tcx_entry_update(dev, entry_new, ingress);
+ tcx_entry_sync();
+ tcx_skeys_inc(ingress);
+ }
+ bpf_mprog_commit(entry);
+ } else if (created) {
+ tcx_entry_free(entry);
+ }
+out:
+ if (replace_prog)
+ bpf_prog_put(replace_prog);
+ rtnl_unlock();
+ return ret;
+}
+
+int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ bool ingress = attr->attach_type == BPF_TCX_INGRESS;
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_mprog_entry *entry, *entry_new;
+ struct net_device *dev;
+ int ret;
+
+ rtnl_lock();
+ dev = __dev_get_by_index(net, attr->target_ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ entry = tcx_entry_fetch(dev, ingress);
+ if (!entry) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
+ attr->relative_fd, attr->expected_revision);
+ if (!ret) {
+ if (!tcx_entry_is_active(entry_new))
+ entry_new = NULL;
+ tcx_entry_update(dev, entry_new, ingress);
+ tcx_entry_sync();
+ tcx_skeys_dec(ingress);
+ bpf_mprog_commit(entry);
+ if (!entry_new)
+ tcx_entry_free(entry);
+ }
+out:
+ rtnl_unlock();
+ return ret;
+}
+
+void tcx_uninstall(struct net_device *dev, bool ingress)
+{
+ struct bpf_tuple tuple = {};
+ struct bpf_mprog_entry *entry;
+ struct bpf_mprog_fp *fp;
+ struct bpf_mprog_cp *cp;
+
+ entry = tcx_entry_fetch(dev, ingress);
+ if (!entry)
+ return;
+ tcx_entry_update(dev, NULL, ingress);
+ tcx_entry_sync();
+ bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
+ if (tuple.link)
+ tcx_link(tuple.link)->dev = NULL;
+ else
+ bpf_prog_put(tuple.prog);
+ tcx_skeys_dec(ingress);
+ }
+ WARN_ON_ONCE(tcx_entry(entry)->miniq_active);
+ tcx_entry_free(entry);
+}
+
+int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
+{
+ bool ingress = attr->query.attach_type == BPF_TCX_INGRESS;
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_mprog_entry *entry;
+ struct net_device *dev;
+ int ret;
+
+ rtnl_lock();
+ dev = __dev_get_by_index(net, attr->query.target_ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ entry = tcx_entry_fetch(dev, ingress);
+ if (!entry) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = bpf_mprog_query(attr, uattr, entry);
+out:
+ rtnl_unlock();
+ return ret;
+}
+
+static int tcx_link_prog_attach(struct bpf_link *link, u32 flags, u32 id_or_fd,
+ u64 revision)
+{
+ struct tcx_link *tcx = tcx_link(link);
+ bool created, ingress = tcx->location == BPF_TCX_INGRESS;
+ struct bpf_mprog_entry *entry, *entry_new;
+ struct net_device *dev = tcx->dev;
+ int ret;
+
+ ASSERT_RTNL();
+ entry = tcx_entry_fetch_or_create(dev, ingress, &created);
+ if (!entry)
+ return -ENOMEM;
+ ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
+ id_or_fd, revision);
+ if (!ret) {
+ if (entry != entry_new) {
+ tcx_entry_update(dev, entry_new, ingress);
+ tcx_entry_sync();
+ tcx_skeys_inc(ingress);
+ }
+ bpf_mprog_commit(entry);
+ } else if (created) {
+ tcx_entry_free(entry);
+ }
+ return ret;
+}
+
+static void tcx_link_release(struct bpf_link *link)
+{
+ struct tcx_link *tcx = tcx_link(link);
+ bool ingress = tcx->location == BPF_TCX_INGRESS;
+ struct bpf_mprog_entry *entry, *entry_new;
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+ dev = tcx->dev;
+ if (!dev)
+ goto out;
+ entry = tcx_entry_fetch(dev, ingress);
+ if (!entry) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
+ if (!ret) {
+ if (!tcx_entry_is_active(entry_new))
+ entry_new = NULL;
+ tcx_entry_update(dev, entry_new, ingress);
+ tcx_entry_sync();
+ tcx_skeys_dec(ingress);
+ bpf_mprog_commit(entry);
+ if (!entry_new)
+ tcx_entry_free(entry);
+ tcx->dev = NULL;
+ }
+out:
+ WARN_ON_ONCE(ret);
+ rtnl_unlock();
+}
+
+static int tcx_link_update(struct bpf_link *link, struct bpf_prog *nprog,
+ struct bpf_prog *oprog)
+{
+ struct tcx_link *tcx = tcx_link(link);
+ bool ingress = tcx->location == BPF_TCX_INGRESS;
+ struct bpf_mprog_entry *entry, *entry_new;
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+ dev = tcx->dev;
+ if (!dev) {
+ ret = -ENOLINK;
+ goto out;
+ }
+ if (oprog && link->prog != oprog) {
+ ret = -EPERM;
+ goto out;
+ }
+ oprog = link->prog;
+ if (oprog == nprog) {
+ bpf_prog_put(nprog);
+ goto out;
+ }
+ entry = tcx_entry_fetch(dev, ingress);
+ if (!entry) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
+ BPF_F_REPLACE | BPF_F_ID,
+ link->prog->aux->id, 0);
+ if (!ret) {
+ WARN_ON_ONCE(entry != entry_new);
+ oprog = xchg(&link->prog, nprog);
+ bpf_prog_put(oprog);
+ bpf_mprog_commit(entry);
+ }
+out:
+ rtnl_unlock();
+ return ret;
+}
+
+static void tcx_link_dealloc(struct bpf_link *link)
+{
+ kfree(tcx_link(link));
+}
+
+static void tcx_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
+{
+ const struct tcx_link *tcx = tcx_link_const(link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (tcx->dev)
+ ifindex = tcx->dev->ifindex;
+ rtnl_unlock();
+
+ seq_printf(seq, "ifindex:\t%u\n", ifindex);
+ seq_printf(seq, "attach_type:\t%u (%s)\n",
+ tcx->location,
+ tcx->location == BPF_TCX_INGRESS ? "ingress" : "egress");
+}
+
+static int tcx_link_fill_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ const struct tcx_link *tcx = tcx_link_const(link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (tcx->dev)
+ ifindex = tcx->dev->ifindex;
+ rtnl_unlock();
+
+ info->tcx.ifindex = ifindex;
+ info->tcx.attach_type = tcx->location;
+ return 0;
+}
+
+static int tcx_link_detach(struct bpf_link *link)
+{
+ tcx_link_release(link);
+ return 0;
+}
+
+static const struct bpf_link_ops tcx_link_lops = {
+ .release = tcx_link_release,
+ .detach = tcx_link_detach,
+ .dealloc = tcx_link_dealloc,
+ .update_prog = tcx_link_update,
+ .show_fdinfo = tcx_link_fdinfo,
+ .fill_link_info = tcx_link_fill_info,
+};
+
+static int tcx_link_init(struct tcx_link *tcx,
+ struct bpf_link_primer *link_primer,
+ const union bpf_attr *attr,
+ struct net_device *dev,
+ struct bpf_prog *prog)
+{
+ bpf_link_init(&tcx->link, BPF_LINK_TYPE_TCX, &tcx_link_lops, prog);
+ tcx->location = attr->link_create.attach_type;
+ tcx->dev = dev;
+ return bpf_link_prime(&tcx->link, link_primer);
+}
+
+int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct net_device *dev;
+ struct tcx_link *tcx;
+ int ret;
+
+ rtnl_lock();
+ dev = __dev_get_by_index(net, attr->link_create.target_ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ tcx = kzalloc(sizeof(*tcx), GFP_USER);
+ if (!tcx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = tcx_link_init(tcx, &link_primer, attr, dev, prog);
+ if (ret) {
+ kfree(tcx);
+ goto out;
+ }
+ ret = tcx_link_prog_attach(&tcx->link, attr->link_create.flags,
+ attr->link_create.tcx.relative_fd,
+ attr->link_create.tcx.expected_revision);
+ if (ret) {
+ tcx->dev = NULL;
+ bpf_link_cleanup(&link_primer);
+ goto out;
+ }
+ ret = bpf_link_settle(&link_primer);
+out:
+ rtnl_unlock();
+ return ret;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 02a021c524ab..71473c19093d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -25,6 +25,7 @@
#include <linux/btf_ids.h>
#include <linux/poison.h>
#include <linux/module.h>
+#include <linux/cpumask.h>
#include "disasm.h"
@@ -5412,12 +5413,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+ [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+ [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+ [CONST_PTR_TO_MAP] = btf_bpf_map_id,
+};
+
static bool is_trusted_reg(const struct bpf_reg_state *reg)
{
/* A referenced register is always trusted. */
if (reg->ref_obj_id)
return true;
+ /* Types listed in the reg2btf_ids are always trusted */
+ if (reg2btf_ids[base_type(reg->type)])
+ return true;
+
/* If a register is not referenced, it is trusted if it has the
* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
@@ -6085,6 +6099,11 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
type_is_rcu_or_null(env, reg, field_name, btf_id)) {
/* __rcu tagged pointers can be NULL */
flag |= MEM_RCU | PTR_MAYBE_NULL;
+
+ /* We always trust them */
+ if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
+ flag & PTR_UNTRUSTED)
+ flag &= ~PTR_UNTRUSTED;
} else if (flag & (MEM_PERCPU | MEM_USER)) {
/* keep as-is */
} else {
@@ -9135,19 +9154,33 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
{
struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
- if (ret_type != RET_INTEGER ||
- (func_id != BPF_FUNC_get_stack &&
- func_id != BPF_FUNC_get_task_stack &&
- func_id != BPF_FUNC_probe_read_str &&
- func_id != BPF_FUNC_probe_read_kernel_str &&
- func_id != BPF_FUNC_probe_read_user_str))
+ if (ret_type != RET_INTEGER)
return;
- ret_reg->smax_value = meta->msize_max_value;
- ret_reg->s32_max_value = meta->msize_max_value;
- ret_reg->smin_value = -MAX_ERRNO;
- ret_reg->s32_min_value = -MAX_ERRNO;
- reg_bounds_sync(ret_reg);
+ switch (func_id) {
+ case BPF_FUNC_get_stack:
+ case BPF_FUNC_get_task_stack:
+ case BPF_FUNC_probe_read_str:
+ case BPF_FUNC_probe_read_kernel_str:
+ case BPF_FUNC_probe_read_user_str:
+ ret_reg->smax_value = meta->msize_max_value;
+ ret_reg->s32_max_value = meta->msize_max_value;
+ ret_reg->smin_value = -MAX_ERRNO;
+ ret_reg->s32_min_value = -MAX_ERRNO;
+ reg_bounds_sync(ret_reg);
+ break;
+ case BPF_FUNC_get_smp_processor_id:
+ ret_reg->umax_value = nr_cpu_ids - 1;
+ ret_reg->u32_max_value = nr_cpu_ids - 1;
+ ret_reg->smax_value = nr_cpu_ids - 1;
+ ret_reg->s32_max_value = nr_cpu_ids - 1;
+ ret_reg->umin_value = 0;
+ ret_reg->u32_min_value = 0;
+ ret_reg->smin_value = 0;
+ ret_reg->s32_min_value = 0;
+ reg_bounds_sync(ret_reg);
+ break;
+ }
}
static int
@@ -10050,15 +10083,6 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
return true;
}
-
-static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
-#ifdef CONFIG_NET
- [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
- [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
- [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
-#endif
-};
-
enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_CTX,
KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 98c1544cf572..f95cfb5bf2ee 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -493,7 +493,6 @@ static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_async_hurry(void) { }
static inline void rcu_async_relax(void) { }
-static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
@@ -508,7 +507,6 @@ void show_rcu_tasks_gp_kthreads(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void show_rcu_tasks_gp_kthreads(void) {}
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
-void rcu_request_urgent_qs_task(struct task_struct *t);
#endif /* #else #ifdef CONFIG_TINY_RCU */
#define RCU_SCHEDULER_INACTIVE 0
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 5f2dcabad202..c92eb8c6ff08 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2369,9 +2369,13 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
if (is_tracepoint || is_syscall_tp) {
*buf = is_tracepoint ? event->tp_event->tp->name
: event->tp_event->name;
- *fd_type = BPF_FD_TYPE_TRACEPOINT;
- *probe_offset = 0x0;
- *probe_addr = 0x0;
+ /* We allow NULL pointer for tracepoint */
+ if (fd_type)
+ *fd_type = BPF_FD_TYPE_TRACEPOINT;
+ if (probe_offset)
+ *probe_offset = 0x0;
+ if (probe_addr)
+ *probe_addr = 0x0;
} else {
/* kprobe/uprobe */
err = -EOPNOTSUPP;
@@ -2384,7 +2388,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
- probe_offset,
+ probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
@@ -2469,6 +2473,7 @@ struct bpf_kprobe_multi_link {
u32 cnt;
u32 mods_cnt;
struct module **mods;
+ u32 flags;
};
struct bpf_kprobe_multi_run_ctx {
@@ -2558,9 +2563,44 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
kfree(kmulti_link);
}
+static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
+ struct bpf_kprobe_multi_link *kmulti_link;
+ u32 ucount = info->kprobe_multi.count;
+ int err = 0, i;
+
+ if (!uaddrs ^ !ucount)
+ return -EINVAL;
+
+ kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+ info->kprobe_multi.count = kmulti_link->cnt;
+ info->kprobe_multi.flags = kmulti_link->flags;
+
+ if (!uaddrs)
+ return 0;
+ if (ucount < kmulti_link->cnt)
+ err = -ENOSPC;
+ else
+ ucount = kmulti_link->cnt;
+
+ if (kallsyms_show_value(current_cred())) {
+ if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, uaddrs + i))
+ return -EFAULT;
+ }
+ }
+ return err;
+}
+
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc = bpf_kprobe_multi_link_dealloc,
+ .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
@@ -2874,6 +2914,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
link->addrs = addrs;
link->cookies = cookies;
link->cnt = cnt;
+ link->flags = flags;
if (cookies) {
/*
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 23dba01831f7..17c21c0b2dd1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1561,15 +1561,10 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
: BPF_FD_TYPE_KPROBE;
- if (tk->symbol) {
- *symbol = tk->symbol;
- *probe_offset = tk->rp.kp.offset;
- *probe_addr = 0;
- } else {
- *symbol = NULL;
- *probe_offset = 0;
- *probe_addr = (unsigned long)tk->rp.kp.addr;
- }
+ *probe_offset = tk->rp.kp.offset;
+ *probe_addr = kallsyms_show_value(current_cred()) ?
+ (unsigned long)tk->rp.kp.addr : 0;
+ *symbol = tk->symbol;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 688bf579f2f1..555c223c3232 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1418,7 +1418,7 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
const char **filename, u64 *probe_offset,
- bool perf_type_tracepoint)
+ u64 *probe_addr, bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
@@ -1435,6 +1435,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
+ *probe_addr = 0;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index fa0833410ac1..913a7a079239 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14381,25 +14381,15 @@ static void *generate_test_data(struct bpf_test *test, int sub)
* single fragment to the skb, filled with
* test->frag_data.
*/
- void *ptr;
-
page = alloc_page(GFP_KERNEL);
-
if (!page)
goto err_kfree_skb;
- ptr = kmap(page);
- if (!ptr)
- goto err_free_page;
- memcpy(ptr, test->frag_data, MAX_DATA);
- kunmap(page);
+ memcpy(page_address(page), test->frag_data, MAX_DATA);
skb_add_rx_frag(skb, 0, page, 0, MAX_DATA, MAX_DATA);
}
return skb;
-
-err_free_page:
- __free_page(page);
err_kfree_skb:
kfree_skb(skb);
return NULL;
diff --git a/net/Kconfig b/net/Kconfig
index 2fb25b534df5..d532ec33f1fe 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,11 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_XGRESS
+ select NET_INGRESS
+ select NET_EGRESS
+ bool
+
config NET_REDIRECT
bool
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2321bd2f9964..7d47f53f20c1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -555,12 +555,23 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+{
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
+__bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
+ void *e, char f, int g)
+{
+ *b += 1;
+ return a + *b + c + d + (long)e + f + g;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -596,6 +607,7 @@ __diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_modify_return_test2)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_SET8_END(bpf_test_modify_return_ids)
@@ -663,7 +675,11 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_MODIFY_RETURN:
ret = bpf_modify_return_test(1, &b);
if (b != 2)
- side_effect = 1;
+ side_effect++;
+ b = 2;
+ ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7);
+ if (b != 2)
+ side_effect++;
break;
default:
goto out;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 4f5098d33a46..a6e94ceb7c9a 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -234,6 +234,14 @@ static int br_switchdev_blocking_event(struct notifier_block *nb,
br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
b->blocking_nb);
break;
+ case SWITCHDEV_BRPORT_REPLAY:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_replay(p, b->dev, b->ctx, b->atomic_nb,
+ b->blocking_nb, extack);
+ err = notifier_from_errno(err);
+ break;
}
out:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6116eba1bd89..9d7bc8b96b53 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
backup_port = rcu_dereference(to->backup_port);
if (unlikely(!backup_port))
goto out;
+ BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
to = backup_port;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 05c5863d2e20..10f0d33d8ccf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_BACKUP_NHID */
+ 0;
}
@@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
backup_p->dev->ifindex);
rcu_read_unlock();
+ if (p->backup_nhid &&
+ nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
+ return -EMSGSIZE;
+
return 0;
}
@@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
return err;
}
+ if (tb[IFLA_BRPORT_BACKUP_NHID]) {
+ u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
+
+ WRITE_ONCE(p->backup_nhid, backup_nhid);
+ }
+
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a63b32c1638e..51e4ca54b537 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,6 +387,7 @@ struct net_bridge_port {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct net_bridge_port __rcu *backup_port;
+ u32 backup_nhid;
/* STP */
u8 priority;
@@ -605,6 +606,8 @@ struct br_input_skb_cb {
*/
unsigned long fwd_hwdoms;
#endif
+
+ u32 backup_nhid;
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -2115,6 +2118,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
+
bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -2165,6 +2174,16 @@ br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
{
}
+static inline int
+br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
return false;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ba95c4d74a60..ee84e783e1df 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -727,6 +727,8 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
err = br_switchdev_mdb_replay_one(nb, dev,
SWITCHDEV_OBJ_PORT_MDB(obj),
action, ctx, extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
if (err)
goto out_free_mdb;
}
@@ -759,8 +761,10 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
extack);
- if (err && err != -EOPNOTSUPP)
+ if (err) {
+ /* -EOPNOTSUPP not propagated from MDB replay. */
return err;
+ }
err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
@@ -825,3 +829,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
nbp_switchdev_del(p);
}
+
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6399a8a69d07..81833ca7a2c7 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
+ if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ tunnel_id, 0);
+ if (!tunnel_dst)
+ return -ENOMEM;
+
+ tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
+ IP_TUNNEL_INFO_BRIDGE;
+ tunnel_dst->u.tun_info.key.nhid =
+ BR_INPUT_SKB_CB(skb)->backup_nhid;
+ skb_dst_set(skb, &tunnel_dst->dst);
+
+ return 0;
+ }
+
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
skb_dst_set(skb, &tunnel_dst->dst);
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..e7ffcfa037f7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -154,7 +155,6 @@
#include "dev.h"
#include "net-sysfs.h"
-
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
@@ -2384,8 +2384,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
struct xps_map *map = NULL;
int pos;
- if (dev_maps)
- map = xmap_dereference(dev_maps->attr_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -3882,69 +3881,198 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
+{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif /* CONFIG_NET_EGRESS */
+
+#ifdef CONFIG_NET_XGRESS
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
{
+ int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
- struct tcf_result cl_res;
+ struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
+ struct tcf_result res;
if (!miniq)
- return skb;
+ return ret;
- /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+ ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+ /* Only tcf related quirks below. */
+ switch (ret) {
+ case TC_ACT_SHOT:
+ mini_qdisc_qstats_cpu_drop(miniq);
+ break;
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
+ skb->tc_index = TC_H_MIN(res.classid);
break;
+ }
+#endif /* CONFIG_NET_CLS_ACT */
+ return ret;
+}
+
+static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
+
+void tcx_inc(void)
+{
+ static_branch_inc(&tcx_needed_key);
+}
+
+void tcx_dec(void)
+{
+ static_branch_dec(&tcx_needed_key);
+}
+
+static __always_inline enum tcx_action_base
+tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
+ const bool needs_mac)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *prog;
+ int ret = TCX_NEXT;
+
+ if (needs_mac)
+ __skb_push(skb, skb->mac_len);
+ bpf_mprog_foreach_prog(entry, fp, prog) {
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run(prog, skb);
+ if (ret != TCX_NEXT)
+ break;
+ }
+ if (needs_mac)
+ __skb_pull(skb, skb->mac_len);
+ return tcx_action_code(skb, ret);
+}
+
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ tcx_set_ingress(skb, true);
+
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, true);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto ingress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+ingress_verdict:
+ switch (sch_ret) {
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by BPF, so we can safely
+ * push the L2 header back before redirecting to another
+ * netdev.
+ */
+ __skb_push(skb, skb->mac_len);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
+ return NULL;
case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- *ret = NET_XMIT_DROP;
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
+ /* used by tc_run */
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
- *ret = NET_XMIT_SUCCESS;
consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
+ }
+
+ return skb;
+}
+
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, false);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto egress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+egress_verdict:
+ switch (sch_ret) {
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
return NULL;
- default:
- break;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
}
-#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-
-static struct netdev_queue *
-netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
-{
- int qm = skb_get_queue_mapping(skb);
-
- return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
-}
-
-static bool netdev_xmit_txqueue_skipped(void)
+#else
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
{
- return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+ return skb;
}
-void netdev_xmit_skip_txqueue(bool skip)
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+ return skb;
}
-EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
-#endif /* CONFIG_NET_EGRESS */
+#endif /* CONFIG_NET_XGRESS */
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
@@ -4128,9 +4256,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_init(skb);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_at_ingress = 0;
-#endif
+ tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_hook_egress_active()) {
@@ -5064,72 +5190,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev, bool *another)
-{
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
- struct tcf_result cl_res;
-
- /* If there's at least one ingress present somewhere (so
- * we get here via enabled static key), remaining devices
- * that are not configured with an ingress qdisc will bail
- * out here.
- */
- if (!miniq)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
- tc_skb_cb(skb)->mru = 0;
- tc_skb_cb(skb)->post_ct = false;
- skb->tc_at_ingress = 1;
- mini_qdisc_bstats_cpu_update(miniq, skb);
-
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
- case TC_ACT_OK:
- case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
- break;
- case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
- *ret = NET_RX_DROP;
- return NULL;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- case TC_ACT_TRAP:
- consume_skb(skb);
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_REDIRECT:
- /* skb_mac_header check was done by cls/act_bpf, so
- * we can safely push the L2 header back before
- * redirecting to another netdev
- */
- __skb_push(skb, skb->mac_len);
- if (skb_do_redirect(skb) == -EAGAIN) {
- __skb_pull(skb, skb->mac_len);
- *another = true;
- break;
- }
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_CONSUMED:
- *ret = NET_RX_SUCCESS;
- return NULL;
- default:
- break;
- }
-#endif /* CONFIG_NET_CLS_ACT */
- return skb;
-}
-
/**
* netdev_is_rx_handler_busy - check if receive handler is registered
* @dev: device to check
@@ -6316,12 +6376,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (threaded)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
return err;
}
@@ -10617,6 +10673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->xdp_zc_max_segs = 1;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
@@ -10838,7 +10895,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
-
+ dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index 06ba0e56e369..797e8f039696 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4345,13 +4345,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- if (map_type == BPF_MAP_TYPE_XSKMAP) {
- /* XDP_REDIRECT is not supported AF_XDP yet. */
- if (unlikely(xdp_buff_has_frags(xdp)))
- return -EOPNOTSUPP;
-
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
- }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -9312,7 +9307,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
@@ -9346,7 +9341,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 value_reg = si->src_reg;
__u8 skb_reg = si->dst_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index a4270fafdf11..65ef4867fc49 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -25,6 +25,14 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
return -EINVAL;
}
+ if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ netdev->xdp_zc_max_segs)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+ }
+
genlmsg_end(rsp, hdr);
return 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a3e12a61d456..7ca456bfab71 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -492,7 +492,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_release_page(struct page_pool *pool, struct page *page)
+static void page_pool_return_page(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
int count;
@@ -518,13 +518,6 @@ skip_dma_unmap:
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
-}
-EXPORT_SYMBOL(page_pool_release_page);
-
-/* Return a page to the page allocator, cleaning up our state */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
-{
- page_pool_release_page(pool, page);
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
@@ -616,9 +609,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- /* Do not replace this with page_pool_return_page() */
- page_pool_release_page(pool, page);
- put_page(page);
+ page_pool_return_page(pool, page);
return NULL;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3ad4e030846d..1777a5e1830b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -61,7 +61,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 43
+#define RTNL_SLAVE_MAX_TYPE 44
struct rtnl_link {
rtnl_doit_func doit;
@@ -1273,7 +1273,6 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
- struct nlattr *vfinfo,
u32 ext_filter_mask)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
@@ -1343,7 +1342,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_trust.setting = ivi.trusted;
vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
if (!vf)
- goto nla_put_vfinfo_failure;
+ return -EMSGSIZE;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
@@ -1414,8 +1413,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_vf_failure:
nla_nest_cancel(skb, vf);
-nla_put_vfinfo_failure:
- nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
@@ -1441,8 +1438,10 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return -EMSGSIZE;
for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
+ if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) {
+ nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
+ }
}
nla_nest_end(skb, vfinfo);
diff --git a/net/core/sock.c b/net/core/sock.c
index 9370fd50aa2c..ab1e8d1bd5a1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1244,17 +1244,11 @@ set_sndbuf:
break;
case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
- else
- clear_bit(SOCK_PASSCRED, &sock->flags);
+ assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
break;
case SO_PASSPIDFD:
- if (valbool)
- set_bit(SOCK_PASSPIDFD, &sock->flags);
- else
- clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
break;
case SO_TIMESTAMP_OLD:
@@ -1358,10 +1352,7 @@ set_sndbuf:
break;
case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
+ assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
break;
case SO_MARK:
if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index fa8079303cb0..8e919cfe6e23 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -474,7 +474,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
.flowi4_oif = inet_iif(skb),
.daddr = iph->saddr,
.saddr = iph->daddr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
+ .flowi4_tos = ip_sock_rt_tos(sk),
+ .flowi4_scope = ip_sock_rt_scope(sk),
.flowi4_proto = sk->sk_protocol,
.fl4_sport = dccp_hdr(skb)->dccph_dport,
.fl4_dport = dccp_hdr(skb)->dccph_sport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7249ef218178..e03b5331df6d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 7e4c2a3b322b..c5d14c48def1 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 1f00f874471f..5128b9c7eea8 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -3946,7 +3946,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get || devlink->reload_failed)
+ if (!param->get)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -3955,7 +3955,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set || devlink->reload_failed)
+ if (!param->set)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 0ce8fd311c78..c63cbfbe6489 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1568,27 +1568,6 @@ static void dsa_port_phylink_validate(struct phylink_config *config,
phylink_generic_validate(config, supported, state);
}
-static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err;
-
- /* Only called for inband modes */
- if (!ds->ops->phylink_mac_link_state) {
- state->link = 0;
- return;
- }
-
- err = ds->ops->phylink_mac_link_state(ds, dp->index, state);
- if (err < 0) {
- dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n",
- dp->index, err);
- state->link = 0;
- }
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1646,17 +1625,6 @@ static int dsa_port_phylink_mac_finish(struct phylink_config *config,
return err;
}
-static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_an_restart)
- return;
-
- ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
@@ -1700,11 +1668,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
- .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
.mac_finish = dsa_port_phylink_mac_finish,
- .mac_an_restart = dsa_port_phylink_mac_an_restart,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
};
@@ -1720,13 +1686,6 @@ int dsa_port_phylink_create(struct dsa_port *dp)
if (err)
mode = PHY_INTERFACE_MODE_NA;
- /* Presence of phylink_mac_link_state or phylink_mac_an_restart is
- * an indicator of a legacy phylink driver.
- */
- if (ds->ops->phylink_mac_link_state ||
- ds->ops->phylink_mac_an_restart)
- dp->pl_config.legacy_pre_march2020 = true;
-
if (ds->ops->phylink_get_caps)
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 527b1d576460..48db91b33390 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <linux/if_hsr.h>
#include <net/dcbnl.h>
#include <linux/netpoll.h>
+#include <linux/string.h>
#include "dsa.h"
#include "port.h"
@@ -1056,10 +1057,10 @@ static void dsa_slave_get_strings(struct net_device *dev,
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
- strncpy(data, "tx_packets", len);
- strncpy(data + len, "tx_bytes", len);
- strncpy(data + 2 * len, "rx_packets", len);
- strncpy(data + 3 * len, "rx_bytes", len);
+ strscpy_pad(data, "tx_packets", len);
+ strscpy_pad(data + len, "tx_bytes", len);
+ strscpy_pad(data + 2 * len, "rx_packets", len);
+ strscpy_pad(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
ds->ops->get_strings(ds, dp->index, stringset,
data + 4 * len);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4406d796cc2f..39dcccf0f174 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
return false;
}
-extern struct btf *btf_vmlinux;
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da0..93f14d39fef6 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+static bool nexthop_is_good_nh(const struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference(nh->nh_info);
+
+ switch (nhi->family) {
+ case AF_INET:
+ return ipv4_good_nh(&nhi->fib_nh);
+ case AF_INET6:
+ return ipv6_good_nh(&nhi->fib6_nh);
+ }
+
+ return false;
+}
+
+static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
int i;
- for (i = 0; i < nhg->num_nh; ++i) {
+ for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
- nhi = rcu_dereference(nhge->nh->nh_info);
- if (nhi->fdb_nh)
- return nhge->nh;
+ return nhge->nh;
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+{
+ struct nexthop *rc = NULL;
+ int i;
+
+ if (nhg->fdb_nh)
+ return nexthop_select_path_fdb(nhg, hash);
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- switch (nhi->family) {
- case AF_INET:
- if (ipv4_good_nh(&nhi->fib_nh))
- return nhge->nh;
- break;
- case AF_INET6:
- if (ipv6_good_nh(&nhi->fib6_nh))
- return nhge->nh;
- break;
- }
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8ed52e1e3c99..aca5620cf3ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk)
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ tcp_scaling_ratio_init(sk);
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
@@ -1700,7 +1701,7 @@ EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
- int cap;
+ int space, cap;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
@@ -1715,10 +1716,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- val <<= 1;
- if (val > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, val);
- tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ space = tcp_space_from_win(sk, val);
+ if (space > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ tcp_sk(sk)->window_clamp = val;
}
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 57c8af1859c1..670c3dab24f2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
+ /* Note: divides are still a bit expensive.
+ * For the moment, only adjust scaling_ratio
+ * when we update icsk_ack.rcv_mss.
+ */
+ if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+ u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+ }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
@@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
@@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(grow, tp->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
- rcvmem += 128;
-
- do_div(rcvwin, tp->advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -4308,10 +4312,16 @@ static inline bool tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+ u32 seq, u32 end_seq)
{
- return !before(end_seq, tp->rcv_wup) &&
- !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+ if (before(end_seq, tp->rcv_wup))
+ return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
+
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ return SKB_NOT_DROPPED_YET;
}
/* When we get a reset we do this. */
@@ -5734,7 +5744,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
/* Step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+ reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
@@ -5751,7 +5762,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
} else if (tcp_reset_check(sk, skb)) {
goto reset;
}
- SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 069642014636..09cffbc82d32 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -2448,6 +2449,8 @@ static void *established_get_first(struct seq_file *seq)
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
+ cond_resched();
+
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(hinfo, st))
continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 42a96b3547c9..8c3ebd95f5b9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1553,7 +1553,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
busylock_release(busy);
return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94cec2075eee..94d1fdb0393f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_rtr_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_rtr_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -5602,6 +5604,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_RTR_LFT] = cnf->accept_ra_min_rtr_lft;
}
static inline size_t inet6_ifla6_size(void)
@@ -6796,6 +6799,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_rtr_lft",
+ .data = &ipv6_devconf.accept_ra_min_rtr_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5d593ddc0347..9f9c4b838664 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9b6818453afe..d80d6024cafa 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
+ const struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
int oif = sk->sk_bound_dev_if;
memset(fl6, 0, sizeof(*fl6));
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 202fc3aaa83c..f4bfccae003c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -612,8 +612,6 @@ looped_back:
kfree(buf);
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 65fa5014bc85..6d88f5248c1f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1034,11 +1034,9 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
- u8 type,
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif)
+ const struct in6_addr *daddr, int oif)
{
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 714cdc9e2b8e..5ce25bcb9974 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1699,11 +1699,9 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int proto, int len)
+static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int proto, int len)
{
struct ipv6hdr *hdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a4..29ddad1c1a2f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1280,6 +1280,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+
if (!ipv6_accept_ra(in6_dev)) {
ND_PRINTK(2, info,
"RA: %s, did not accept ra for dev: %s\n",
@@ -1287,6 +1289,13 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_linkparms;
}
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_rtr_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto skip_linkparms;
+ }
+
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
@@ -1339,8 +1348,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
- lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
-
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
@@ -1492,6 +1499,13 @@ skip_linkparms:
goto out;
}
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_rtr_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto out;
+ }
+
#ifdef CONFIG_IPV6_ROUTE_INFO
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index f804c11e2146..2a0e8bc07398 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ac1cef094c5f..0fcf1b890807 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1216,6 +1216,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index b1c028df686e..a013b92cbb86 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -272,8 +272,6 @@ static int rpl_input(struct sk_buff *skb)
dst = dst_cache_get(&rlwt->cache);
preempt_enable();
- skb_dst_drop(skb);
-
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
@@ -284,6 +282,7 @@ static int rpl_input(struct sk_buff *skb)
preempt_enable();
}
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4714eb695913..1b4529e833a1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2175,6 +2175,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b7c972aa09a7..95c75d8f73d5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1798,6 +1798,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 8e010d07917a..267d491e9707 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ede3c6a60353..542439b6a59c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1281,7 +1281,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
natt->encap_dport = n_port->sadb_x_nat_t_port_port;
}
- memset(&natt->encap_oa, 0, sizeof(natt->encap_oa));
}
err = xfrm_init_state(x);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b1623f9c4f92..2eee95a00c05 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3317d1cca156..65ee949a8a44 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -90,6 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
+ msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
@@ -1881,6 +1882,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
+ u8 scaling_ratio = U8_MAX;
u32 time, advmss = 1;
u64 rtt_us, mstamp;
@@ -1911,9 +1913,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
rtt_us = max(sf_rtt_us, rtt_us);
advmss = max(sf_advmss, advmss);
+ scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
}
msk->rcvq_space.rtt_us = rtt_us;
+ msk->scaling_ratio = scaling_ratio;
if (time < (rtt_us >> 3) || rtt_us == 0)
return;
@@ -1922,8 +1926,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
@@ -1932,18 +1936,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < advmss)
- rcvmem += 128;
-
- do_div(rcvwin, advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = tcp_win_from_space(sk, rcvbuf);
+ window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
@@ -3987,6 +3986,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433..795f422e8597 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -321,6 +321,7 @@ struct mptcp_sock {
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+ u8 scaling_ratio;
u32 subflow_id;
u32 setsockopt_seq;
@@ -351,9 +352,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
+static inline int mptcp_win_from_space(const struct sock *sk, int space)
+{
+ return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6..ad7080fabb2f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1359,7 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
const struct sock *sk = subflow->conn;
*space = __mptcp_space(sk);
- *full_space = tcp_full_space(sk);
+ *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
void __mptcp_error_report(struct sock *sk)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 992393102d5f..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..e87fd4314c68 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 383631873748..96c605e45235 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -677,6 +677,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sock, unsigned long *groups);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -704,6 +705,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
+ release = nl_table[protocol].release;
netlink_unlock_table();
if (err < 0)
@@ -719,6 +721,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk->module = module;
nlk->netlink_bind = bind;
nlk->netlink_unbind = unbind;
+ nlk->netlink_release = release;
out:
return err;
@@ -763,6 +766,8 @@ static int netlink_release(struct socket *sock)
* OK. Socket is unlinked, any packets that arrive now
* will be purged.
*/
+ if (nlk->netlink_release)
+ nlk->netlink_release(sk, nlk->groups);
/* must not acquire netlink_table_lock in any way again before unbind
* and notifying genetlink is done as otherwise it might deadlock
@@ -1432,6 +1437,8 @@ struct netlink_broadcast_data {
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
+ int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+ void *tx_data;
};
static void do_one_broadcast(struct sock *sk,
@@ -1485,6 +1492,13 @@ static void do_one_broadcast(struct sock *sk,
p->delivery_failure = 1;
goto out;
}
+
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
+ goto out;
+ }
+
if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
@@ -1507,8 +1521,12 @@ out:
sock_put(sk);
}
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ u32 portid,
+ u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk,
+ struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
@@ -1527,6 +1545,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
+ info.tx_filter = filter;
+ info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
@@ -1552,6 +1572,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
}
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+ u32 group, gfp_t allocation)
+{
+ return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
+ NULL, NULL);
+}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
@@ -1629,10 +1657,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
old = test_bit(group - 1, nlk->groups);
subscriptions = nlk->subscriptions - old + new;
- if (new)
- __set_bit(group - 1, nlk->groups);
- else
- __clear_bit(group - 1, nlk->groups);
+ __assign_bit(group - 1, nlk->groups, new);
netlink_update_subscriptions(&nlk->sk, subscriptions);
netlink_update_listeners(&nlk->sk);
}
@@ -2069,6 +2094,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg) {
nl_table[unit].bind = cfg->bind;
nl_table[unit].unbind = cfg->unbind;
+ nl_table[unit].release = cfg->release;
nl_table[unit].flags = cfg->flags;
}
nl_table[unit].registered = 1;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 90a3198a9b7f..fd424cd63f31 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -42,6 +42,8 @@ struct netlink_sock {
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
+ void (*netlink_release)(struct sock *sk,
+ unsigned long *groups);
struct module *module;
struct rhash_head node;
@@ -64,6 +66,8 @@ struct netlink_table {
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sk,
+ unsigned long *groups);
int registered;
};
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a157247a1e45..6bd2ce51271f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -593,8 +593,12 @@ static int genl_validate_ops(const struct genl_family *family)
return -EINVAL;
/* Check sort order */
- if (a->cmd < b->cmd)
+ if (a->cmd < b->cmd) {
continue;
+ } else if (a->cmd > b->cmd) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
if (a->internal_flags != b->internal_flags ||
((a->flags ^ b->flags) & ~(GENL_CMD_CAP_DO |
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 331730fd3580..fa955e892210 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -455,45 +455,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -852,36 +813,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1460,7 +1401,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 78beb74146e7..41ece61eb57a 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -23,6 +23,8 @@
#define QRTR_EPH_PORT_RANGE \
XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
+#define QRTR_PORT_CTRL_LEGACY 0xffff
+
/**
* struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
@@ -495,6 +497,9 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
+ if (cb->dst_port == QRTR_PORT_CTRL_LEGACY)
+ cb->dst_port = QRTR_PORT_CTRL;
+
if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 0f7a729f1a1f..b1db0b519179 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -16,7 +16,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
-static RADIX_TREE(nodes, GFP_KERNEL);
+static DEFINE_XARRAY(nodes);
static struct {
struct socket *sock;
@@ -66,14 +66,14 @@ struct qrtr_server {
struct qrtr_node {
unsigned int id;
- struct radix_tree_root servers;
+ struct xarray servers;
};
static struct qrtr_node *node_get(unsigned int node_id)
{
struct qrtr_node *node;
- node = radix_tree_lookup(&nodes, node_id);
+ node = xa_load(&nodes, node_id);
if (node)
return node;
@@ -83,8 +83,9 @@ static struct qrtr_node *node_get(unsigned int node_id)
return NULL;
node->id = node_id;
+ xa_init(&node->servers);
- if (radix_tree_insert(&nodes, node_id, node)) {
+ if (xa_store(&nodes, node_id, node, GFP_KERNEL)) {
kfree(node);
return NULL;
}
@@ -193,40 +194,23 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
static int announce_servers(struct sockaddr_qrtr *sq)
{
- struct radix_tree_iter iter;
struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
int ret;
node = node_get(qrtr_ns.local_node);
if (!node)
return 0;
- rcu_read_lock();
/* Announce the list of servers registered in this node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&node->servers, index, srv) {
ret = service_announce_new(sq, srv);
if (ret < 0) {
pr_err("failed to announce new service\n");
return ret;
}
-
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -256,14 +240,17 @@ static struct qrtr_server *server_add(unsigned int service,
goto err;
/* Delete the old server on the same port */
- old = radix_tree_lookup(&node->servers, port);
+ old = xa_store(&node->servers, port, srv, GFP_KERNEL);
if (old) {
- radix_tree_delete(&node->servers, port);
- kfree(old);
+ if (xa_is_err(old)) {
+ pr_err("failed to add server [0x%x:0x%x] ret:%d\n",
+ srv->service, srv->instance, xa_err(old));
+ goto err;
+ } else {
+ kfree(old);
+ }
}
- radix_tree_insert(&node->servers, port, srv);
-
trace_qrtr_ns_server_add(srv->service, srv->instance,
srv->node, srv->port);
@@ -280,11 +267,11 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
struct qrtr_server *srv;
struct list_head *li;
- srv = radix_tree_lookup(&node->servers, port);
+ srv = xa_load(&node->servers, port);
if (!srv)
return -ENOENT;
- radix_tree_delete(&node->servers, port);
+ xa_erase(&node->servers, port);
/* Broadcast the removal of local servers */
if (srv->node == qrtr_ns.local_node && bcast)
@@ -344,13 +331,12 @@ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq)
static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_ctrl_pkt pkt;
struct qrtr_server *srv;
struct sockaddr_qrtr sq;
struct msghdr msg = { };
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -361,22 +347,9 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
if (!node)
return 0;
- rcu_read_lock();
/* Advertise removal of this client to all servers of remote node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
+ xa_for_each(&node->servers, index, srv)
server_del(node, srv->port, true);
- rcu_read_lock();
- }
- rcu_read_unlock();
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -387,18 +360,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
pkt.client.node = cpu_to_le32(from->sq_node);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -411,11 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pr_err("failed to send bye cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -423,7 +381,6 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
unsigned int node_id, unsigned int port)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_lookup *lookup;
struct qrtr_ctrl_pkt pkt;
struct msghdr msg = { };
@@ -432,7 +389,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
struct qrtr_node *node;
struct list_head *tmp;
struct list_head *li;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -477,18 +434,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pkt.client.node = cpu_to_le32(node_id);
pkt.client.port = cpu_to_le32(port);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -501,11 +447,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pr_err("failed to send del client cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -576,13 +518,12 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
unsigned int service, unsigned int instance)
{
- struct radix_tree_iter node_iter;
struct qrtr_server_filter filter;
- struct radix_tree_iter srv_iter;
struct qrtr_lookup *lookup;
+ struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **node_slot;
- void __rcu **srv_slot;
+ unsigned long node_idx;
+ unsigned long srv_idx;
/* Accept only local observers */
if (from->sq_node != qrtr_ns.local_node)
@@ -601,40 +542,14 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
filter.service = service;
filter.instance = instance;
- rcu_read_lock();
- radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
- node = radix_tree_deref_slot(node_slot);
- if (!node)
- continue;
- if (radix_tree_deref_retry(node)) {
- node_slot = radix_tree_iter_retry(&node_iter);
- continue;
- }
- node_slot = radix_tree_iter_resume(node_slot, &node_iter);
-
- radix_tree_for_each_slot(srv_slot, &node->servers,
- &srv_iter, 0) {
- struct qrtr_server *srv;
-
- srv = radix_tree_deref_slot(srv_slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- srv_slot = radix_tree_iter_retry(&srv_iter);
- continue;
- }
-
+ xa_for_each(&nodes, node_idx, node) {
+ xa_for_each(&node->servers, srv_idx, srv) {
if (!server_match(srv, &filter))
continue;
- srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter);
-
- rcu_read_unlock();
lookup_notify(from, srv, true);
- rcu_read_lock();
}
}
- rcu_read_unlock();
/* Empty notification, to indicate end of listing */
lookup_notify(from, NULL, true);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..470c70deffe2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE
config NET_SCH_INGRESS
tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
- select NET_INGRESS
- select NET_EGRESS
+ select NET_XGRESS
help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
@@ -679,6 +678,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
+ select NET_XGRESS
help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index abc71a06d634..7c652d14528b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,
}
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 325c29041c7d..333800a7d4eb 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,10 +1810,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
goto failure;
}
- if (hopt->quantum) {
- NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
- goto failure;
- }
}
/* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1910,6 +1906,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -1931,6 +1928,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2017,6 +2015,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index e43a45499372..a463a63192c3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -13,6 +13,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tcx.h>
struct ingress_sched_data {
struct tcf_block *block;
@@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_INGRESS)
@@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
- mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->block_info.chain_head_change = clsact_chain_head_change;
@@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);
if (sch->parent != TC_H_INGRESS)
return;
tcf_block_put_ext(q->block, sch, &q->block_info);
+
+ if (entry) {
+ tcx_miniq_set_active(entry, false);
+ if (!tcx_entry_is_active(entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(entry);
+ }
+ }
+
net_dec_ingress_queue();
}
@@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_CLSACT)
@@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
net_inc_egress_queue();
- mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
@@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
- mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+ entry = tcx_entry_fetch_or_create(dev, false, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, false);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
q->egress_block_info.chain_head_change = clsact_chain_head_change;
@@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress);
+ struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);
if (sch->parent != TC_H_CLSACT)
return;
- tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+
+ if (ingress_entry) {
+ tcx_miniq_set_active(ingress_entry, false);
+ if (!tcx_entry_is_active(ingress_entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(ingress_entry);
+ }
+ }
+
+ if (egress_entry) {
+ tcx_miniq_set_active(egress_entry, false);
+ if (!tcx_entry_is_active(egress_entry)) {
+ tcx_entry_update(dev, NULL, false);
+ tcx_entry_free(egress_entry);
+ }
+ }
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 274d07bd774f..33c0895e101c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -435,7 +435,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
+ fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9388d98aebc0..6e3d28aa587c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8cc42aea19c7..5b045284849e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -862,3 +862,28 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
NULL);
}
EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
+
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_REPLAY,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_replay);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index b769fc258931..352d042b130b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -348,37 +348,34 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0, off;
- struct sk_buff *skb, *tmp;
- int err = -EFAULT;
+ struct sk_buff *skb;
+ size_t total = 0;
+ int err;
spin_lock_bh(&vvs->rx_lock);
- skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
- off = 0;
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ size_t bytes;
- if (total == len)
- break;
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
- while (total < len && off < skb->len) {
- bytes = len - total;
- if (bytes > skb->len - off)
- bytes = skb->len - off;
+ spin_unlock_bh(&vvs->rx_lock);
- /* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
- */
- spin_unlock_bh(&vvs->rx_lock);
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ goto out;
- err = memcpy_to_msg(msg, skb->data + off, bytes);
- if (err)
- goto out;
+ total += bytes;
- spin_lock_bh(&vvs->rx_lock);
+ spin_lock_bh(&vvs->rx_lock);
- total += bytes;
- off += bytes;
- }
+ if (total == len)
+ break;
}
spin_unlock_bh(&vvs->rx_lock);
@@ -463,6 +460,63 @@ out:
return err;
}
+static ssize_t
+virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct sk_buff *skb;
+ size_t total, len;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ if (!vvs->msg_count) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return 0;
+ }
+
+ total = 0;
+ len = msg_data_left(msg);
+
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ struct virtio_vsock_hdr *hdr;
+
+ if (total < len) {
+ size_t bytes;
+ int err;
+
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vvs->rx_lock);
+ }
+
+ total += skb->len;
+ hdr = virtio_vsock_hdr(skb);
+
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
+
+ break;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+}
+
static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
int flags)
@@ -557,9 +611,9 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+ return virtio_transport_seqpacket_do_peek(vsk, msg);
+ else
+ return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 31dca4ecb2c5..4f1e0599146e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -135,14 +135,14 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
+ u32 flags)
{
- struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
u64 addr;
int err;
addr = xp_get_handle(xskb);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
return err;
@@ -152,48 +152,138 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
}
-static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *from_buf, *to_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u32 frags = xdp_buff_has_frags(xdp);
+ struct xdp_buff_xsk *pos, *tmp;
+ struct list_head *xskb_list;
+ u32 contd = 0;
+ int err;
- if (unlikely(xdp_data_meta_unsupported(from))) {
- from_buf = from->data;
- to_buf = to->data;
- metalen = 0;
- } else {
- from_buf = from->data_meta;
- metalen = from->data - from->data_meta;
- to_buf = to->data - metalen;
+ if (frags)
+ contd = XDP_PKT_CONTD;
+
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err || likely(!frags))
+ goto out;
+
+ xskb_list = &xskb->pool->xskb_list;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ if (list_is_singular(xskb_list))
+ contd = 0;
+ len = pos->xdp.data_end - pos->xdp.data;
+ err = __xsk_rcv_zc(xs, pos, len, contd);
+ if (err)
+ return err;
+ list_del(&pos->xskb_list_node);
}
- memcpy(to_buf, from_buf, len + metalen);
+out:
+ return err;
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static void *xsk_copy_xdp_start(struct xdp_buff *from)
{
+ if (unlikely(xdp_data_meta_unsupported(from)))
+ return from->data;
+ else
+ return from->data_meta;
+}
+
+static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
+ u32 *from_len, skb_frag_t **frag, u32 rem)
+{
+ u32 copied = 0;
+
+ while (1) {
+ u32 copy_len = min_t(u32, *from_len, to_len);
+
+ memcpy(to, *from, copy_len);
+ copied += copy_len;
+ if (rem == copied)
+ return copied;
+
+ if (*from_len == copy_len) {
+ *from = skb_frag_address(*frag);
+ *from_len = skb_frag_size((*frag)++);
+ } else {
+ *from += copy_len;
+ *from_len -= copy_len;
+ }
+ if (to_len == copy_len)
+ return copied;
+
+ to_len -= copy_len;
+ to += copy_len;
+ }
+}
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
+ void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
+ u32 from_len, meta_len, rem, num_desc;
+ struct xdp_buff_xsk *xskb;
struct xdp_buff *xsk_xdp;
- int err;
- u32 len;
+ skb_frag_t *frag;
- len = xdp->data_end - xdp->data;
- if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
- xs->rx_dropped++;
- return -ENOSPC;
+ from_len = xdp->data_end - copy_from;
+ meta_len = xdp->data - copy_from;
+ rem = len + meta_len;
+
+ if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
+ int err;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOMEM;
+ }
+ memcpy(xsk_xdp->data - meta_len, copy_from, rem);
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ err = __xsk_rcv_zc(xs, xskb, len, 0);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+
+ return 0;
}
- xsk_xdp = xsk_buff_alloc(xs->pool);
- if (!xsk_xdp) {
+ num_desc = (len - 1) / frame_size + 1;
+
+ if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
xs->rx_dropped++;
return -ENOMEM;
}
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ return -ENOBUFS;
+ }
- xsk_copy_xdp(xsk_xdp, xdp, len);
- err = __xsk_rcv_zc(xs, xsk_xdp, len);
- if (err) {
- xsk_buff_free(xsk_xdp);
- return err;
+ if (xdp_buff_has_frags(xdp)) {
+ struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ frag = &sinfo->frags[0];
}
+
+ do {
+ u32 to_len = frame_size + meta_len;
+ u32 copied;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ copy_to = xsk_xdp->data - meta_len;
+
+ copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
+ rem -= copied;
+
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ __xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+ meta_len = 0;
+ } while (rem);
+
return 0;
}
@@ -215,7 +305,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
if (!xsk_is_bound(xs))
return -ENXIO;
@@ -223,6 +313,11 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+ if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -236,12 +331,13 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
spin_lock_bh(&xs->rx_lock);
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (!err) {
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
}
spin_unlock_bh(&xs->rx_lock);
@@ -250,19 +346,19 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
- u32 len;
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (err)
return err;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
len = xdp->data_end - xdp->data;
- return __xsk_rcv_zc(xs, xdp, len);
+ return xsk_rcv_zc(xs, xdp, len);
}
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
if (!err)
xdp_return_buff(xdp);
return err;
@@ -321,7 +417,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx))
+ xskq_cons_release(xs->tx);
continue;
}
@@ -408,37 +505,91 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static void xsk_destruct_skb(struct sk_buff *skb)
+static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
+ return ret;
+}
+
+static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
{
- u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
- struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_addr(xs->pool->cq, addr);
+ xskq_prod_submit_n(xs->pool->cq, n);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel_n(xs->pool->cq, n);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static u32 xsk_get_num_desc(struct sk_buff *skb)
+{
+ return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+}
+
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+ xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+ long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+ skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_consume_skb(struct sk_buff *skb)
+{
+ struct xdp_sock *xs = xdp_sk(skb->sk);
+
+ skb->destructor = sock_wfree;
+ xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ /* Free skb without triggering the perf drop trace */
+ consume_skb(skb);
+ xs->skb = NULL;
+}
+
+static void xsk_drop_skb(struct sk_buff *skb)
+{
+ xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
+ xsk_consume_skb(skb);
+}
+
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
int err, i;
u64 addr;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
- skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+ if (unlikely(!skb))
+ return ERR_PTR(err);
- skb_reserve(skb, hr);
+ skb_reserve(skb, hr);
+ }
addr = desc->addr;
len = desc->len;
@@ -448,7 +599,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
offset = offset_in_page(buffer);
addr = buffer - pool->addrs;
- for (copied = 0, i = 0; copied < len; i++) {
+ for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
+ if (unlikely(i >= MAX_SKB_FRAGS))
+ return ERR_PTR(-EFAULT);
+
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -473,43 +627,77 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct net_device *dev = xs->dev;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
+ int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
skb = xsk_build_skb_zerocopy(xs, desc);
- if (IS_ERR(skb))
- return skb;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto free_err;
+ }
} else {
u32 hr, tr, len;
void *buffer;
- int err;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
- tr = dev->needed_tailroom;
+ buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
len = desc->len;
- skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ tr = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+ if (unlikely(!skb))
+ goto free_err;
- skb_reserve(skb, hr);
- skb_put(skb, len);
+ skb_reserve(skb, hr);
+ skb_put(skb, len);
- buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
- err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
- return ERR_PTR(err);
+ err = skb_store_bits(skb, 0, buffer, len);
+ if (unlikely(err))
+ goto free_err;
+ } else {
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page;
+ u8 *vaddr;
+
+ if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
+ err = -EFAULT;
+ goto free_err;
+ }
+
+ page = alloc_page(xs->sk.sk_allocation);
+ if (unlikely(!page)) {
+ err = -EAGAIN;
+ goto free_err;
+ }
+
+ vaddr = kmap_local_page(page);
+ memcpy(vaddr, buffer, len);
+ kunmap_local(vaddr);
+
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
}
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
skb->mark = xs->sk.sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
+ xsk_set_destructor_arg(skb);
return skb;
+
+free_err:
+ if (err == -EAGAIN) {
+ xsk_cq_cancel_locked(xs, 1);
+ } else {
+ xsk_set_destructor_arg(skb);
+ xsk_drop_skb(skb);
+ xskq_cons_release(xs->tx);
+ }
+
+ return ERR_PTR(err);
}
static int __xsk_generic_xmit(struct sock *sk)
@@ -519,7 +707,6 @@ static int __xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
- unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -544,47 +731,51 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- if (xskq_prod_reserve(xs->pool->cq)) {
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ if (xsk_cq_reserve_addr_locked(xs, desc.addr))
goto out;
- }
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- goto out;
+ if (err == -EAGAIN)
+ goto out;
+ err = 0;
+ continue;
+ }
+
+ xskq_cons_release(xs->tx);
+
+ if (xp_mb_desc(&desc)) {
+ xs->skb = skb;
+ continue;
}
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
- skb->destructor = sock_wfree;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- /* Free skb without triggering the perf drop trace */
- consume_skb(skb);
+ xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
+ xsk_consume_skb(skb);
err = -EAGAIN;
goto out;
}
- xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP) {
/* SKB completed but not sent */
err = -EBUSY;
+ xs->skb = NULL;
goto out;
}
sent_frame = true;
+ xs->skb = NULL;
}
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx)) {
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+ xskq_cons_release(xs->tx);
+ }
out:
if (sent_frame)
@@ -834,6 +1025,9 @@ static int xsk_release(struct socket *sock)
net = sock_net(sk);
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+
mutex_lock(&net->xdp.lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->xdp.lock);
@@ -897,7 +1091,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
flags = sxdp->sxdp_flags;
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
- XDP_USE_NEED_WAKEUP))
+ XDP_USE_NEED_WAKEUP | XDP_USE_SG))
return -EINVAL;
bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
@@ -929,7 +1123,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct socket *sock;
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
- (flags & XDP_USE_NEED_WAKEUP)) {
+ (flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -1028,6 +1222,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
+ xs->sg = !!(flags & XDP_USE_SG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 26f6d304451e..b3f7b310811e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->umem = umem;
pool->addrs = umem->addrs;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
@@ -99,6 +100,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
INIT_LIST_HEAD(&xskb->free_list_node);
+ INIT_LIST_HEAD(&xskb->xskb_list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -187,6 +189,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..13354a1e4280 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -48,6 +48,11 @@ struct xsk_queue {
size_t ring_vmalloc_size;
};
+struct parsed_desc {
+ u32 mb;
+ u32 valid;
+};
+
/* The structure of the shared state of the rings are a simple
* circular buffer, as outlined in
* Documentation/core-api/circular-buffers.rst. For the Rx and
@@ -130,18 +135,26 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
return false;
}
+static inline bool xp_unused_options_set(u32 options)
+{
+ return options & ~XDP_PKT_CONTD;
+}
+
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
+ if (!desc->len)
+ return false;
+
if (offset + desc->len > pool->chunk_size)
return false;
if (desc->addr >= pool->addrs_cnt)
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -151,6 +164,9 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ if (!desc->len)
+ return false;
+
if (desc->len > pool->chunk_size)
return false;
@@ -158,7 +174,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -170,6 +186,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
xp_aligned_validate_desc(pool, desc);
}
+static inline bool xskq_has_descs(struct xsk_queue *q)
+{
+ return q->cached_cons != q->cached_prod;
+}
+
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xsk_buff_pool *pool)
@@ -185,17 +206,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xsk_buff_pool *pool)
{
- while (q->cached_cons != q->cached_prod) {
+ if (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = q->cached_cons & q->ring_mask;
*desc = ring->desc[idx];
- if (xskq_cons_is_valid_desc(q, desc, pool))
- return true;
-
- q->cached_cons++;
+ return xskq_cons_is_valid_desc(q, desc, pool);
}
+ q->queue_empty_descs++;
return false;
}
@@ -204,30 +223,52 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
+static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ struct xdp_desc *desc, struct parsed_desc *parsed)
+{
+ parsed->valid = xskq_cons_is_valid_desc(q, desc, pool);
+ parsed->mb = xp_mb_desc(desc);
+}
+
+static inline
+u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
struct xdp_desc *descs = pool->tx_descs;
+ u32 total_descs = 0, nr_frags = 0;
+ /* track first entry, if stumble upon *any* invalid descriptor, rewind
+ * current packet that consists of frags and stop the processing
+ */
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = cached_cons & q->ring_mask;
+ struct parsed_desc parsed;
descs[nb_entries] = ring->desc[idx];
- if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) {
- /* Skip the entry */
- cached_cons++;
- continue;
+ cached_cons++;
+ parse_desc(q, pool, &descs[nb_entries], &parsed);
+ if (unlikely(!parsed.valid))
+ break;
+
+ if (likely(!parsed.mb)) {
+ total_descs += (nr_frags + 1);
+ nr_frags = 0;
+ } else {
+ nr_frags++;
+ if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ nr_frags = 0;
+ break;
+ }
}
-
nb_entries++;
- cached_cons++;
}
+ cached_cons -= nr_frags;
/* Release valid plus any invalid entries */
xskq_cons_release_n(q, cached_cons - q->cached_cons);
- return nb_entries;
+ return total_descs;
}
/* Functions for consumers */
@@ -292,6 +333,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
q->cached_cons++;
}
+static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
+{
+ q->cached_cons -= cnt;
+}
+
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
@@ -319,9 +365,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
-static inline void xskq_prod_cancel(struct xsk_queue *q)
+static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
{
- q->cached_prod--;
+ q->cached_prod -= cnt;
}
static inline int xskq_prod_reserve(struct xsk_queue *q)
@@ -360,7 +406,7 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
- u64 addr, u32 len)
+ u64 addr, u32 len, u32 flags)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
@@ -372,6 +418,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
+ ring->desc[idx].options = flags;
return 0;
}
@@ -386,16 +433,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
__xskq_prod_submit(q, q->cached_prod);
}
-static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
-{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- u32 idx = q->ring->producer;
-
- ring->desc[idx++ & q->ring_mask] = addr;
-
- __xskq_prod_submit(q, idx);
-}
-
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
__xskq_prod_submit(q, q->ring->producer + nb_entries);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 533697e2488f..3784534c9185 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -247,12 +247,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- /* We don't yet support UDP encapsulation and TFC padding. */
- if (x->encap || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
- return -EINVAL;
- }
-
if (xuo->flags &
~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
@@ -260,6 +254,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
+
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if ((!is_packet_offload && x->encap) || x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ return -EINVAL;
+ }
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 615f24ebc49c..595b98d825ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -248,7 +248,7 @@ BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
- $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+ $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
$(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
@@ -370,7 +370,7 @@ endif
clean-files += vmlinux.h
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -392,7 +392,7 @@ $(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
@echo " CLANG-BPF " $@
- $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \
-Wno-compare-distinct-pointer-types -I$(srctree)/include \
-I$(srctree)/samples/bpf -I$(srctree)/tools/include \
-I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 57f93edd1957..f16fc48e55a5 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -8,11 +8,14 @@ Build dependencies
==================
Compiling requires having installed:
- * clang >= version 3.4.0
- * llvm >= version 3.7.1
+ * clang
+ * llvm
+ * pahole
-Note that LLVM's tool 'llc' must support target 'bpf', list version
-and supported targets with command: ``llc --version``
+Consult :ref:`Documentation/process/changes.rst <changes>` for the minimum
+version numbers required and how to update them. Note that LLVM's tool
+'llc' must support target 'bpf', list version and supported targets with
+command: ``llc --version``
Clean and configuration
-----------------------
@@ -24,7 +27,8 @@ after some changes (on demand)::
make -C samples/bpf clean
make clean
-Configure kernel, defconfig for instance::
+Configure kernel, defconfig for instance
+(see "tools/testing/selftests/bpf/config" for a reference config)::
make defconfig
diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h
index 719225b16626..1c638d9dce1a 100644
--- a/samples/bpf/gnu/stubs.h
+++ b/samples/bpf/gnu/stubs.h
@@ -1 +1 @@
-/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index e7121dd1ee37..090fecfe641a 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -44,12 +44,14 @@ static __always_inline void count(void *map)
bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
}
+#if !defined(__aarch64__)
SEC("tracepoint/syscalls/sys_enter_open")
int trace_enter_open(struct syscalls_enter_open_args *ctx)
{
count(&enter_open_map);
return 0;
}
+#endif
SEC("tracepoint/syscalls/sys_enter_openat")
int trace_enter_open_at(struct syscalls_enter_open_args *ctx)
@@ -65,12 +67,14 @@ int trace_enter_open_at2(struct syscalls_enter_open_args *ctx)
return 0;
}
+#if !defined(__aarch64__)
SEC("tracepoint/syscalls/sys_exit_open")
int trace_enter_exit(struct syscalls_exit_open_args *ctx)
{
count(&exit_open_map);
return 0;
}
+#endif
SEC("tracepoint/syscalls/sys_exit_openat")
int trace_enter_exit_at(struct syscalls_exit_open_args *ctx)
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
index 0bf2d0f6bf4b..148e2df6cdce 100755
--- a/samples/bpf/test_lwt_bpf.sh
+++ b/samples/bpf/test_lwt_bpf.sh
@@ -376,7 +376,7 @@ DST_MAC=$(lookup_mac $VETH1 $NS1)
SRC_MAC=$(lookup_mac $VETH0)
DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
-CLANG_OPTS="-O2 -target bpf -I ../include/"
+CLANG_OPTS="-O2 --target=bpf -I ../include/"
CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
clang $CLANG_OPTS -c $PROG_SRC -o $BPF_PROG
diff --git a/samples/hid/Makefile b/samples/hid/Makefile
index 026288280a03..9f7fe29dd749 100644
--- a/samples/hid/Makefile
+++ b/samples/hid/Makefile
@@ -86,7 +86,7 @@ BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
- $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+ $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
$(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
@@ -181,7 +181,7 @@ endif
clean-files += vmlinux.h
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -198,7 +198,7 @@ EXTRA_BPF_HEADERS_SRC := $(addprefix $(src)/,$(EXTRA_BPF_HEADERS))
$(obj)/%.bpf.o: $(src)/%.bpf.c $(EXTRA_BPF_HEADERS_SRC) $(obj)/vmlinux.h
@echo " CLANG-BPF " $@
- $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \
-Wno-compare-distinct-pointer-types -I$(srctree)/include \
-I$(srctree)/samples/bpf -I$(srctree)/tools/include \
-I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
diff --git a/security/security.c b/security/security.c
index b720424ca37d..2dfc7b9f6ed9 100644
--- a/security/security.c
+++ b/security/security.c
@@ -4396,7 +4396,7 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk)
}
EXPORT_SYMBOL(security_sk_clone);
-void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic)
+void security_sk_classify_flow(const struct sock *sk, struct flowi_common *flic)
{
call_void_hook(sk_getsecid, sk, &flic->flowic_secid);
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d06e350fedee..2bdc48dd8670 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5167,12 +5167,12 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
selinux_netlbl_sk_security_reset(newsksec);
}
-static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
+static void selinux_sk_getsecid(const struct sock *sk, u32 *secid)
{
if (!sk)
*secid = SECINITSID_ANY_SOCKET;
else {
- struct sk_security_struct *sksec = sk->sk_security;
+ const struct sk_security_struct *sksec = sk->sk_security;
*secid = sksec->sid;
}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 68454ef28f58..5006e724d1bc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -260,9 +260,9 @@ EXAMPLES
This is example BPF application with two BPF programs and a mix of BPF maps
and global variables. Source code is split across two source code files.
-**$ clang -target bpf -g example1.bpf.c -o example1.bpf.o**
+**$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o**
-**$ clang -target bpf -g example2.bpf.c -o example2.bpf.o**
+**$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o**
**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index f4e0a516335a..5e2abd3de5ab 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -4,7 +4,7 @@
bpftool-net
================
-------------------------------------------------------------------------------
-tool for inspection of netdev/tc related bpf prog attachments
+tool for inspection of networking related bpf prog attachments
-------------------------------------------------------------------------------
:Manual section: 8
@@ -37,10 +37,13 @@ DESCRIPTION
**bpftool net { show | list }** [ **dev** *NAME* ]
List bpf program attachments in the kernel networking subsystem.
- Currently, only device driver xdp attachments and tc filter
- classification/action attachments are implemented, i.e., for
- program types **BPF_PROG_TYPE_SCHED_CLS**,
- **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
+ Currently, device driver xdp attachments, tcx and old-style tc
+ classifier/action attachments, flow_dissector as well as netfilter
+ attachments are implemented, i.e., for
+ program types **BPF_PROG_TYPE_XDP**, **BPF_PROG_TYPE_SCHED_CLS**,
+ **BPF_PROG_TYPE_SCHED_ACT**, **BPF_PROG_TYPE_FLOW_DISSECTOR**,
+ **BPF_PROG_TYPE_NETFILTER**.
+
For programs attached to a particular cgroup, e.g.,
**BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
**BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
@@ -49,12 +52,13 @@ DESCRIPTION
bpf programs, users should consult other tools, e.g., iproute2.
The current output will start with all xdp program attachments, followed by
- all tc class/qdisc bpf program attachments. Both xdp programs and
- tc programs are ordered based on ifindex number. If multiple bpf
- programs attached to the same networking device through **tc filter**,
- the order will be first all bpf programs attached to tc classes, then
- all bpf programs attached to non clsact qdiscs, and finally all
- bpf programs attached to root and clsact qdisc.
+ all tcx, then tc class/qdisc bpf program attachments, then flow_dissector
+ and finally netfilter programs. Both xdp programs and tcx/tc programs are
+ ordered based on ifindex number. If multiple bpf programs attached
+ to the same networking device through **tc**, the order will be first
+ all bpf programs attached to tcx, then tc classes, then all bpf programs
+ attached to non clsact qdiscs, and finally all bpf programs attached
+ to root and clsact qdisc.
**bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
Attach bpf program *PROG* to network interface *NAME* with
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 681fbcc5ed50..e9154ace80ff 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -216,7 +216,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \
-g -O2 -Wall -fno-stack-protector \
- -target bpf -c $< -o $@
+ --target=bpf -c $< -o $@
$(Q)$(LLVM_STRIP) -g $@
$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 294de231db99..1b7f69714604 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -835,7 +835,7 @@ static void dotlabel_puts(const char *s)
case '|':
case ' ':
putchar('\\');
- /* fallthrough */
+ fallthrough;
default:
putchar(*s);
}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 0675d6a46413..edda4fc2c4d0 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -757,7 +757,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type,
case BPF_FUNC_probe_write_user:
if (!full_mode)
continue;
- /* fallthrough */
+ fallthrough;
default:
probe_res |= probe_helper_for_progtype(prog_type, supported_type,
define_prefix, id, prog_type_str,
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2d786072ed0d..65a168df63bc 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -5,6 +5,7 @@
#include <linux/err.h>
#include <linux/netfilter.h>
#include <linux/netfilter_arp.h>
+#include <linux/perf_event.h>
#include <net/if.h>
#include <stdio.h>
#include <unistd.h>
@@ -14,8 +15,78 @@
#include "json_writer.h"
#include "main.h"
+#include "xlated_dumper.h"
+
+#define PERF_HW_CACHE_LEN 128
static struct hashmap *link_table;
+static struct dump_data dd;
+
+static const char *perf_type_name[PERF_TYPE_MAX] = {
+ [PERF_TYPE_HARDWARE] = "hardware",
+ [PERF_TYPE_SOFTWARE] = "software",
+ [PERF_TYPE_TRACEPOINT] = "tracepoint",
+ [PERF_TYPE_HW_CACHE] = "hw-cache",
+ [PERF_TYPE_RAW] = "raw",
+ [PERF_TYPE_BREAKPOINT] = "breakpoint",
+};
+
+const char *event_symbols_hw[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = "cpu-cycles",
+ [PERF_COUNT_HW_INSTRUCTIONS] = "instructions",
+ [PERF_COUNT_HW_CACHE_REFERENCES] = "cache-references",
+ [PERF_COUNT_HW_CACHE_MISSES] = "cache-misses",
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "branch-instructions",
+ [PERF_COUNT_HW_BRANCH_MISSES] = "branch-misses",
+ [PERF_COUNT_HW_BUS_CYCLES] = "bus-cycles",
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "stalled-cycles-frontend",
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "stalled-cycles-backend",
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = "ref-cycles",
+};
+
+const char *event_symbols_sw[PERF_COUNT_SW_MAX] = {
+ [PERF_COUNT_SW_CPU_CLOCK] = "cpu-clock",
+ [PERF_COUNT_SW_TASK_CLOCK] = "task-clock",
+ [PERF_COUNT_SW_PAGE_FAULTS] = "page-faults",
+ [PERF_COUNT_SW_CONTEXT_SWITCHES] = "context-switches",
+ [PERF_COUNT_SW_CPU_MIGRATIONS] = "cpu-migrations",
+ [PERF_COUNT_SW_PAGE_FAULTS_MIN] = "minor-faults",
+ [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = "major-faults",
+ [PERF_COUNT_SW_ALIGNMENT_FAULTS] = "alignment-faults",
+ [PERF_COUNT_SW_EMULATION_FAULTS] = "emulation-faults",
+ [PERF_COUNT_SW_DUMMY] = "dummy",
+ [PERF_COUNT_SW_BPF_OUTPUT] = "bpf-output",
+ [PERF_COUNT_SW_CGROUP_SWITCHES] = "cgroup-switches",
+};
+
+const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] = {
+ [PERF_COUNT_HW_CACHE_L1D] = "L1-dcache",
+ [PERF_COUNT_HW_CACHE_L1I] = "L1-icache",
+ [PERF_COUNT_HW_CACHE_LL] = "LLC",
+ [PERF_COUNT_HW_CACHE_DTLB] = "dTLB",
+ [PERF_COUNT_HW_CACHE_ITLB] = "iTLB",
+ [PERF_COUNT_HW_CACHE_BPU] = "branch",
+ [PERF_COUNT_HW_CACHE_NODE] = "node",
+};
+
+const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] = {
+ [PERF_COUNT_HW_CACHE_OP_READ] = "load",
+ [PERF_COUNT_HW_CACHE_OP_WRITE] = "store",
+ [PERF_COUNT_HW_CACHE_OP_PREFETCH] = "prefetch",
+};
+
+const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [PERF_COUNT_HW_CACHE_RESULT_ACCESS] = "refs",
+ [PERF_COUNT_HW_CACHE_RESULT_MISS] = "misses",
+};
+
+#define perf_event_name(array, id) ({ \
+ const char *event_str = NULL; \
+ \
+ if ((id) >= 0 && (id) < ARRAY_SIZE(array)) \
+ event_str = array[id]; \
+ event_str; \
+})
static int link_parse_fd(int *argc, char ***argv)
{
@@ -166,6 +237,154 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info)
return err;
}
+static int cmp_u64(const void *A, const void *B)
+{
+ const __u64 *a = A, *b = B;
+
+ return *a - *b;
+}
+
+static void
+show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ __u32 i, j = 0;
+ __u64 *addrs;
+
+ jsonw_bool_field(json_wtr, "retprobe",
+ info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
+ jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count);
+ jsonw_name(json_wtr, "funcs");
+ jsonw_start_array(json_wtr);
+ addrs = u64_to_ptr(info->kprobe_multi.addrs);
+ qsort(addrs, info->kprobe_multi.count, sizeof(addrs[0]), cmp_u64);
+
+ /* Load it once for all. */
+ if (!dd.sym_count)
+ kernel_syms_load(&dd);
+ for (i = 0; i < dd.sym_count; i++) {
+ if (dd.sym_mapping[i].address != addrs[j])
+ continue;
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
+ jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
+ /* Print null if it is vmlinux */
+ if (dd.sym_mapping[i].module[0] == '\0') {
+ jsonw_name(json_wtr, "module");
+ jsonw_null(json_wtr);
+ } else {
+ jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module);
+ }
+ jsonw_end_object(json_wtr);
+ if (j++ == info->kprobe_multi.count)
+ break;
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void
+show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ jsonw_bool_field(wtr, "retprobe", info->perf_event.type == BPF_PERF_EVENT_KRETPROBE);
+ jsonw_uint_field(wtr, "addr", info->perf_event.kprobe.addr);
+ jsonw_string_field(wtr, "func",
+ u64_to_ptr(info->perf_event.kprobe.func_name));
+ jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset);
+}
+
+static void
+show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ jsonw_bool_field(wtr, "retprobe", info->perf_event.type == BPF_PERF_EVENT_URETPROBE);
+ jsonw_string_field(wtr, "file",
+ u64_to_ptr(info->perf_event.uprobe.file_name));
+ jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
+}
+
+static void
+show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ jsonw_string_field(wtr, "tracepoint",
+ u64_to_ptr(info->perf_event.tracepoint.tp_name));
+}
+
+static char *perf_config_hw_cache_str(__u64 config)
+{
+ const char *hw_cache, *result, *op;
+ char *str = malloc(PERF_HW_CACHE_LEN);
+
+ if (!str) {
+ p_err("mem alloc failed");
+ return NULL;
+ }
+
+ hw_cache = perf_event_name(evsel__hw_cache, config & 0xff);
+ if (hw_cache)
+ snprintf(str, PERF_HW_CACHE_LEN, "%s-", hw_cache);
+ else
+ snprintf(str, PERF_HW_CACHE_LEN, "%lld-", config & 0xff);
+
+ op = perf_event_name(evsel__hw_cache_op, (config >> 8) & 0xff);
+ if (op)
+ snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
+ "%s-", op);
+ else
+ snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
+ "%lld-", (config >> 8) & 0xff);
+
+ result = perf_event_name(evsel__hw_cache_result, config >> 16);
+ if (result)
+ snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
+ "%s", result);
+ else
+ snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
+ "%lld", config >> 16);
+ return str;
+}
+
+static const char *perf_config_str(__u32 type, __u64 config)
+{
+ const char *perf_config;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ perf_config = perf_event_name(event_symbols_hw, config);
+ break;
+ case PERF_TYPE_SOFTWARE:
+ perf_config = perf_event_name(event_symbols_sw, config);
+ break;
+ case PERF_TYPE_HW_CACHE:
+ perf_config = perf_config_hw_cache_str(config);
+ break;
+ default:
+ perf_config = NULL;
+ break;
+ }
+ return perf_config;
+}
+
+static void
+show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ __u64 config = info->perf_event.event.config;
+ __u32 type = info->perf_event.event.type;
+ const char *perf_type, *perf_config;
+
+ perf_type = perf_event_name(perf_type_name, type);
+ if (perf_type)
+ jsonw_string_field(wtr, "event_type", perf_type);
+ else
+ jsonw_uint_field(wtr, "event_type", type);
+
+ perf_config = perf_config_str(type, config);
+ if (perf_config)
+ jsonw_string_field(wtr, "event_config", perf_config);
+ else
+ jsonw_uint_field(wtr, "event_config", config);
+
+ if (type == PERF_TYPE_HW_CACHE && perf_config)
+ free((void *)perf_config);
+}
+
static int show_link_close_json(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
@@ -218,6 +437,29 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
jsonw_uint_field(json_wtr, "map_id",
info->struct_ops.map_id);
break;
+ case BPF_LINK_TYPE_KPROBE_MULTI:
+ show_kprobe_multi_json(info, json_wtr);
+ break;
+ case BPF_LINK_TYPE_PERF_EVENT:
+ switch (info->perf_event.type) {
+ case BPF_PERF_EVENT_EVENT:
+ show_perf_event_event_json(info, json_wtr);
+ break;
+ case BPF_PERF_EVENT_TRACEPOINT:
+ show_perf_event_tracepoint_json(info, json_wtr);
+ break;
+ case BPF_PERF_EVENT_KPROBE:
+ case BPF_PERF_EVENT_KRETPROBE:
+ show_perf_event_kprobe_json(info, json_wtr);
+ break;
+ case BPF_PERF_EVENT_UPROBE:
+ case BPF_PERF_EVENT_URETPROBE:
+ show_perf_event_uprobe_json(info, json_wtr);
+ break;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -351,6 +593,113 @@ void netfilter_dump_plain(const struct bpf_link_info *info)
printf(" flags 0x%x", info->netfilter.flags);
}
+static void show_kprobe_multi_plain(struct bpf_link_info *info)
+{
+ __u32 i, j = 0;
+ __u64 *addrs;
+
+ if (!info->kprobe_multi.count)
+ return;
+
+ if (info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN)
+ printf("\n\tkretprobe.multi ");
+ else
+ printf("\n\tkprobe.multi ");
+ printf("func_cnt %u ", info->kprobe_multi.count);
+ addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs);
+ qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64);
+
+ /* Load it once for all. */
+ if (!dd.sym_count)
+ kernel_syms_load(&dd);
+ if (!dd.sym_count)
+ return;
+
+ printf("\n\t%-16s %s", "addr", "func [module]");
+ for (i = 0; i < dd.sym_count; i++) {
+ if (dd.sym_mapping[i].address != addrs[j])
+ continue;
+ printf("\n\t%016lx %s",
+ dd.sym_mapping[i].address, dd.sym_mapping[i].name);
+ if (dd.sym_mapping[i].module[0] != '\0')
+ printf(" [%s] ", dd.sym_mapping[i].module);
+ else
+ printf(" ");
+
+ if (j++ == info->kprobe_multi.count)
+ break;
+ }
+}
+
+static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
+{
+ const char *buf;
+
+ buf = u64_to_ptr(info->perf_event.kprobe.func_name);
+ if (buf[0] == '\0' && !info->perf_event.kprobe.addr)
+ return;
+
+ if (info->perf_event.type == BPF_PERF_EVENT_KRETPROBE)
+ printf("\n\tkretprobe ");
+ else
+ printf("\n\tkprobe ");
+ if (info->perf_event.kprobe.addr)
+ printf("%llx ", info->perf_event.kprobe.addr);
+ printf("%s", buf);
+ if (info->perf_event.kprobe.offset)
+ printf("+%#x", info->perf_event.kprobe.offset);
+ printf(" ");
+}
+
+static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
+{
+ const char *buf;
+
+ buf = u64_to_ptr(info->perf_event.uprobe.file_name);
+ if (buf[0] == '\0')
+ return;
+
+ if (info->perf_event.type == BPF_PERF_EVENT_URETPROBE)
+ printf("\n\turetprobe ");
+ else
+ printf("\n\tuprobe ");
+ printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
+}
+
+static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
+{
+ const char *buf;
+
+ buf = u64_to_ptr(info->perf_event.tracepoint.tp_name);
+ if (buf[0] == '\0')
+ return;
+
+ printf("\n\ttracepoint %s ", buf);
+}
+
+static void show_perf_event_event_plain(struct bpf_link_info *info)
+{
+ __u64 config = info->perf_event.event.config;
+ __u32 type = info->perf_event.event.type;
+ const char *perf_type, *perf_config;
+
+ printf("\n\tevent ");
+ perf_type = perf_event_name(perf_type_name, type);
+ if (perf_type)
+ printf("%s:", perf_type);
+ else
+ printf("%u :", type);
+
+ perf_config = perf_config_str(type, config);
+ if (perf_config)
+ printf("%s ", perf_config);
+ else
+ printf("%llu ", config);
+
+ if (type == PERF_TYPE_HW_CACHE && perf_config)
+ free((void *)perf_config);
+}
+
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
@@ -396,6 +745,29 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
case BPF_LINK_TYPE_NETFILTER:
netfilter_dump_plain(info);
break;
+ case BPF_LINK_TYPE_KPROBE_MULTI:
+ show_kprobe_multi_plain(info);
+ break;
+ case BPF_LINK_TYPE_PERF_EVENT:
+ switch (info->perf_event.type) {
+ case BPF_PERF_EVENT_EVENT:
+ show_perf_event_event_plain(info);
+ break;
+ case BPF_PERF_EVENT_TRACEPOINT:
+ show_perf_event_tracepoint_plain(info);
+ break;
+ case BPF_PERF_EVENT_KPROBE:
+ case BPF_PERF_EVENT_KRETPROBE:
+ show_perf_event_kprobe_plain(info);
+ break;
+ case BPF_PERF_EVENT_UPROBE:
+ case BPF_PERF_EVENT_URETPROBE:
+ show_perf_event_uprobe_plain(info);
+ break;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -417,10 +789,13 @@ static int do_show_link(int fd)
{
struct bpf_link_info info;
__u32 len = sizeof(info);
- char buf[256];
+ __u64 *addrs = NULL;
+ char buf[PATH_MAX];
+ int count;
int err;
memset(&info, 0, sizeof(info));
+ buf[0] = '\0';
again:
err = bpf_link_get_info_by_fd(fd, &info, &len);
if (err) {
@@ -431,22 +806,67 @@ again:
}
if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
!info.raw_tracepoint.tp_name) {
- info.raw_tracepoint.tp_name = (unsigned long)&buf;
+ info.raw_tracepoint.tp_name = ptr_to_u64(&buf);
info.raw_tracepoint.tp_name_len = sizeof(buf);
goto again;
}
if (info.type == BPF_LINK_TYPE_ITER &&
!info.iter.target_name) {
- info.iter.target_name = (unsigned long)&buf;
+ info.iter.target_name = ptr_to_u64(&buf);
info.iter.target_name_len = sizeof(buf);
goto again;
}
+ if (info.type == BPF_LINK_TYPE_KPROBE_MULTI &&
+ !info.kprobe_multi.addrs) {
+ count = info.kprobe_multi.count;
+ if (count) {
+ addrs = calloc(count, sizeof(__u64));
+ if (!addrs) {
+ p_err("mem alloc failed");
+ close(fd);
+ return -ENOMEM;
+ }
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ goto again;
+ }
+ }
+ if (info.type == BPF_LINK_TYPE_PERF_EVENT) {
+ switch (info.perf_event.type) {
+ case BPF_PERF_EVENT_TRACEPOINT:
+ if (!info.perf_event.tracepoint.tp_name) {
+ info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf);
+ info.perf_event.tracepoint.name_len = sizeof(buf);
+ goto again;
+ }
+ break;
+ case BPF_PERF_EVENT_KPROBE:
+ case BPF_PERF_EVENT_KRETPROBE:
+ if (!info.perf_event.kprobe.func_name) {
+ info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
+ info.perf_event.kprobe.name_len = sizeof(buf);
+ goto again;
+ }
+ break;
+ case BPF_PERF_EVENT_UPROBE:
+ case BPF_PERF_EVENT_URETPROBE:
+ if (!info.perf_event.uprobe.file_name) {
+ info.perf_event.uprobe.file_name = ptr_to_u64(&buf);
+ info.perf_event.uprobe.name_len = sizeof(buf);
+ goto again;
+ }
+ break;
+ default:
+ break;
+ }
+ }
if (json_output)
show_link_close_json(fd, &info);
else
show_link_close_plain(fd, &info);
+ if (addrs)
+ free(addrs);
close(fd);
return 0;
}
@@ -471,7 +891,8 @@ static int do_show(int argc, char **argv)
fd = link_parse_fd(&argc, &argv);
if (fd < 0)
return fd;
- return do_show_link(fd);
+ do_show_link(fd);
+ goto out;
}
if (argc)
@@ -510,6 +931,9 @@ static int do_show(int argc, char **argv)
if (show_pinned)
delete_pinned_obj_table(link_table);
+out:
+ if (dd.sym_count)
+ kernel_syms_destroy(&dd);
return errno == ENOENT ? 0 : -1;
}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 26a49965bf71..66a8ce8ae012 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -76,6 +76,11 @@ static const char * const attach_type_strings[] = {
[NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload",
};
+static const char * const attach_loc_strings[] = {
+ [BPF_TCX_INGRESS] = "tcx/ingress",
+ [BPF_TCX_EGRESS] = "tcx/egress",
+};
+
const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings);
static enum net_attach_type parse_attach_type(const char *str)
@@ -422,8 +427,89 @@ static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb)
filter_info->devname, filter_info->ifindex);
}
-static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
- struct ip_devname_ifindex *dev)
+static int __show_dev_tc_bpf_name(__u32 id, char *name, size_t len)
+{
+ struct bpf_prog_info info = {};
+ __u32 ilen = sizeof(info);
+ int fd, ret;
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0)
+ return fd;
+ ret = bpf_obj_get_info_by_fd(fd, &info, &ilen);
+ if (ret < 0)
+ goto out;
+ ret = -ENOENT;
+ if (info.name[0]) {
+ get_prog_full_name(&info, fd, name, len);
+ ret = 0;
+ }
+out:
+ close(fd);
+ return ret;
+}
+
+static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
+ const enum bpf_attach_type loc)
+{
+ __u32 prog_flags[64] = {}, link_flags[64] = {}, i, j;
+ __u32 prog_ids[64] = {}, link_ids[64] = {};
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ char prog_name[MAX_PROG_FULL_NAME];
+ int ret;
+
+ optq.prog_ids = prog_ids;
+ optq.prog_attach_flags = prog_flags;
+ optq.link_ids = link_ids;
+ optq.link_attach_flags = link_flags;
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ ret = bpf_prog_query_opts(dev->ifindex, loc, &optq);
+ if (ret)
+ return;
+ for (i = 0; i < optq.count; i++) {
+ NET_START_OBJECT;
+ NET_DUMP_STR("devname", "%s", dev->devname);
+ NET_DUMP_UINT("ifindex", "(%u)", dev->ifindex);
+ NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]);
+ ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name,
+ sizeof(prog_name));
+ if (!ret)
+ NET_DUMP_STR("name", " %s", prog_name);
+ NET_DUMP_UINT("prog_id", " prog_id %u ", prog_ids[i]);
+ if (prog_flags[i] || json_output) {
+ NET_START_ARRAY("prog_flags", "%s ");
+ for (j = 0; prog_flags[i] && j < 32; j++) {
+ if (!(prog_flags[i] & (1 << j)))
+ continue;
+ NET_DUMP_UINT_ONLY(1 << j);
+ }
+ NET_END_ARRAY("");
+ }
+ if (link_ids[i] || json_output) {
+ NET_DUMP_UINT("link_id", "link_id %u ", link_ids[i]);
+ if (link_flags[i] || json_output) {
+ NET_START_ARRAY("link_flags", "%s ");
+ for (j = 0; link_flags[i] && j < 32; j++) {
+ if (!(link_flags[i] & (1 << j)))
+ continue;
+ NET_DUMP_UINT_ONLY(1 << j);
+ }
+ NET_END_ARRAY("");
+ }
+ }
+ NET_END_OBJECT_FINAL;
+ }
+}
+
+static void show_dev_tc_bpf(struct ip_devname_ifindex *dev)
+{
+ __show_dev_tc_bpf(dev, BPF_TCX_INGRESS);
+ __show_dev_tc_bpf(dev, BPF_TCX_EGRESS);
+}
+
+static int show_dev_tc_bpf_classic(int sock, unsigned int nl_pid,
+ struct ip_devname_ifindex *dev)
{
struct bpf_filter_t filter_info;
struct bpf_tcinfo_t tcinfo;
@@ -790,8 +876,9 @@ static int do_show(int argc, char **argv)
if (!ret) {
NET_START_ARRAY("tc", "%s:\n");
for (i = 0; i < dev_array.used_len; i++) {
- ret = show_dev_tc_bpf(sock, nl_pid,
- &dev_array.devices[i]);
+ show_dev_tc_bpf(&dev_array.devices[i]);
+ ret = show_dev_tc_bpf_classic(sock, nl_pid,
+ &dev_array.devices[i]);
if (ret)
break;
}
@@ -839,7 +926,8 @@ static int do_help(int argc, char **argv)
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
" " HELP_SPEC_OPTIONS " }\n"
"\n"
- "Note: Only xdp and tc attachments are supported now.\n"
+ "Note: Only xdp, tcx, tc, flow_dissector and netfilter attachments\n"
+ " are currently supported.\n"
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
" to dump program attachments. For program types\n"
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h
index 774af6c62ef5..96318106fb49 100644
--- a/tools/bpf/bpftool/netlink_dumper.h
+++ b/tools/bpf/bpftool/netlink_dumper.h
@@ -76,6 +76,14 @@
fprintf(stdout, fmt_str, val); \
}
+#define NET_DUMP_UINT_ONLY(str) \
+{ \
+ if (json_output) \
+ jsonw_uint(json_wtr, str); \
+ else \
+ fprintf(stdout, "%u ", str); \
+}
+
#define NET_DUMP_STR(name, fmt_str, str) \
{ \
if (json_output) \
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index eb05ea53afb1..26004f0c5a6a 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -15,6 +15,19 @@ enum bpf_obj_type {
BPF_OBJ_BTF,
};
+struct bpf_perf_link___local {
+ struct bpf_link link;
+ struct file *perf_file;
+} __attribute__((preserve_access_index));
+
+struct perf_event___local {
+ u64 bpf_cookie;
+} __attribute__((preserve_access_index));
+
+enum bpf_link_type___local {
+ BPF_LINK_TYPE_PERF_EVENT___local = 7,
+};
+
extern const void bpf_link_fops __ksym;
extern const void bpf_map_fops __ksym;
extern const void bpf_prog_fops __ksym;
@@ -41,10 +54,10 @@ static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
/* could be used only with BPF_LINK_TYPE_PERF_EVENT links */
static __u64 get_bpf_cookie(struct bpf_link *link)
{
- struct bpf_perf_link *perf_link;
- struct perf_event *event;
+ struct bpf_perf_link___local *perf_link;
+ struct perf_event___local *event;
- perf_link = container_of(link, struct bpf_perf_link, link);
+ perf_link = container_of(link, struct bpf_perf_link___local, link);
event = BPF_CORE_READ(perf_link, perf_file, private_data);
return BPF_CORE_READ(event, bpf_cookie);
}
@@ -84,10 +97,13 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- if (obj_type == BPF_OBJ_LINK) {
+ if (obj_type == BPF_OBJ_LINK &&
+ bpf_core_enum_value_exists(enum bpf_link_type___local,
+ BPF_LINK_TYPE_PERF_EVENT___local)) {
struct bpf_link *link = (struct bpf_link *) file->private_data;
- if (BPF_CORE_READ(link, type) == BPF_LINK_TYPE_PERF_EVENT) {
+ if (link->type == bpf_core_enum_value(enum bpf_link_type___local,
+ BPF_LINK_TYPE_PERF_EVENT___local)) {
e.has_bpf_cookie = true;
e.bpf_cookie = get_bpf_cookie(link);
}
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index ce5b65e07ab1..2f80edc682f1 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -4,6 +4,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+struct bpf_perf_event_value___local {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+} __attribute__((preserve_access_index));
+
/* map of perf event fds, num_cpu * num_metric entries */
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -15,14 +21,14 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
} fentry_readings SEC(".maps");
/* accumulated readings */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
} accum_readings SEC(".maps");
/* sample counts, one per cpu */
@@ -39,7 +45,7 @@ const volatile __u32 num_metric = 1;
SEC("fentry/XXX")
int BPF_PROG(fentry_XXX)
{
- struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+ struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS];
u32 key = bpf_get_smp_processor_id();
u32 i;
@@ -53,10 +59,10 @@ int BPF_PROG(fentry_XXX)
}
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
- struct bpf_perf_event_value reading;
+ struct bpf_perf_event_value___local reading;
int err;
- err = bpf_perf_event_read_value(&events, key, &reading,
+ err = bpf_perf_event_read_value(&events, key, (void *)&reading,
sizeof(reading));
if (err)
return 0;
@@ -68,14 +74,14 @@ int BPF_PROG(fentry_XXX)
}
static inline void
-fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after)
{
- struct bpf_perf_event_value *before, diff;
+ struct bpf_perf_event_value___local *before, diff;
before = bpf_map_lookup_elem(&fentry_readings, &id);
/* only account samples with a valid fentry_reading */
if (before && before->counter) {
- struct bpf_perf_event_value *accum;
+ struct bpf_perf_event_value___local *accum;
diff.counter = after->counter - before->counter;
diff.enabled = after->enabled - before->enabled;
@@ -93,7 +99,7 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
- struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+ struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS];
u32 cpu = bpf_get_smp_processor_id();
u32 i, zero = 0;
int err;
@@ -102,7 +108,8 @@ int BPF_PROG(fexit_XXX)
/* read all events before updating the maps, to reduce error */
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
- readings + i, sizeof(*readings));
+ (void *)(readings + i),
+ sizeof(*readings));
if (err)
return 0;
}
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index da608e10c843..567f56dfd9f1 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -46,7 +46,11 @@ out:
}
dd->sym_mapping = tmp;
sym = &dd->sym_mapping[dd->sym_count];
- if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+
+ /* module is optional */
+ sym->module[0] = '\0';
+ /* trim the square brackets around the module name */
+ if (sscanf(buff, "%p %*c %s [%[^]]s", &address, sym->name, sym->module) < 2)
continue;
sym->address = (unsigned long)address;
if (!strcmp(sym->name, "__bpf_call_base")) {
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 9a946377b0e6..db3ba0671501 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -5,12 +5,14 @@
#define __BPF_TOOL_XLATED_DUMPER_H
#define SYM_MAX_NAME 256
+#define MODULE_MAX_NAME 64
struct bpf_prog_linfo;
struct kernel_sym {
unsigned long address;
char name[SYM_MAX_NAME];
+ char module[MODULE_MAX_NAME];
};
struct dump_data {
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 47acf6936516..d8288936c912 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -62,7 +62,7 @@ $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
- $(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES) \
+ $(QUIET_GEN)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 2cd6dbbee088..f0c5de018a95 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -372,7 +372,7 @@ $(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
$(OUTPUT)test-clang-bpf-co-re.bin:
- $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
+ $(CLANG) -S -g --target=bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR
$(OUTPUT)test-file-handle.bin:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 60a9d59beeab..739c15906a65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1036,6 +1036,8 @@ enum bpf_attach_type {
BPF_LSM_CGROUP,
BPF_STRUCT_OPS,
BPF_NETFILTER,
+ BPF_TCX_INGRESS,
+ BPF_TCX_EGRESS,
__MAX_BPF_ATTACH_TYPE
};
@@ -1053,10 +1055,20 @@ enum bpf_link_type {
BPF_LINK_TYPE_KPROBE_MULTI = 8,
BPF_LINK_TYPE_STRUCT_OPS = 9,
BPF_LINK_TYPE_NETFILTER = 10,
-
+ BPF_LINK_TYPE_TCX = 11,
MAX_BPF_LINK_TYPE,
};
+enum bpf_perf_event_type {
+ BPF_PERF_EVENT_UNSPEC = 0,
+ BPF_PERF_EVENT_UPROBE = 1,
+ BPF_PERF_EVENT_URETPROBE = 2,
+ BPF_PERF_EVENT_KPROBE = 3,
+ BPF_PERF_EVENT_KRETPROBE = 4,
+ BPF_PERF_EVENT_TRACEPOINT = 5,
+ BPF_PERF_EVENT_EVENT = 6,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -1103,7 +1115,12 @@ enum bpf_link_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
+/* Generic attachment flags. */
#define BPF_F_REPLACE (1U << 2)
+#define BPF_F_BEFORE (1U << 3)
+#define BPF_F_AFTER (1U << 4)
+#define BPF_F_ID (1U << 5)
+#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -1434,14 +1451,19 @@ union bpf_attr {
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
- __u32 target_fd; /* container object to attach to */
- __u32 attach_bpf_fd; /* eBPF program to attach */
+ union {
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
+ __u32 attach_bpf_fd;
__u32 attach_type;
__u32 attach_flags;
- __u32 replace_bpf_fd; /* previously attached eBPF
- * program to replace if
- * BPF_F_REPLACE is used
- */
+ __u32 replace_bpf_fd;
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -1487,16 +1509,26 @@ union bpf_attr {
} info;
struct { /* anonymous struct used by BPF_PROG_QUERY command */
- __u32 target_fd; /* container object to query */
+ union {
+ __u32 target_fd; /* target object to query or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type;
__u32 query_flags;
__u32 attach_flags;
__aligned_u64 prog_ids;
- __u32 prog_cnt;
+ union {
+ __u32 prog_cnt;
+ __u32 count;
+ };
+ __u32 :32;
/* output: per-program attach_flags.
* not allowed to be set during effective query.
*/
__aligned_u64 prog_attach_flags;
+ __aligned_u64 link_ids;
+ __aligned_u64 link_attach_flags;
+ __u64 revision;
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -1539,13 +1571,13 @@ union bpf_attr {
__u32 map_fd; /* struct_ops to attach */
};
union {
- __u32 target_fd; /* object to attach to */
- __u32 target_ifindex; /* target ifindex */
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
union {
- __u32 target_btf_id; /* btf_id of target to attach to */
+ __u32 target_btf_id; /* btf_id of target to attach to */
struct {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
@@ -1579,6 +1611,13 @@ union bpf_attr {
__s32 priority;
__u32 flags;
} netfilter;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } tcx;
};
} link_create;
@@ -6187,6 +6226,19 @@ struct bpf_sock_tuple {
};
};
+/* (Simplified) user return codes for tcx prog type.
+ * A valid tcx program must return one of these defined values. All other
+ * return codes are reserved for future use. Must remain compatible with
+ * their TC_ACT_* counter-parts. For compatibility in behavior, unknown
+ * return codes are mapped to TCX_NEXT.
+ */
+enum tcx_action_base {
+ TCX_NEXT = -1,
+ TCX_PASS = 0,
+ TCX_DROP = 2,
+ TCX_REDIRECT = 7,
+};
+
struct bpf_xdp_sock {
__u32 queue_id;
};
@@ -6439,6 +6491,40 @@ struct bpf_link_info {
__s32 priority;
__u32 flags;
} netfilter;
+ struct {
+ __aligned_u64 addrs;
+ __u32 count; /* in/out: kprobe_multi function count */
+ __u32 flags;
+ } kprobe_multi;
+ struct {
+ __u32 type; /* enum bpf_perf_event_type */
+ __u32 :32;
+ union {
+ struct {
+ __aligned_u64 file_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from file_name */
+ } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
+ struct {
+ __aligned_u64 func_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from func_name */
+ __u64 addr;
+ } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
+ struct {
+ __aligned_u64 tp_name; /* in/out */
+ __u32 name_len;
+ } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
+ struct {
+ __u64 config;
+ __u32 type;
+ } event; /* BPF_PERF_EVENT_EVENT */
+ };
+ } perf_event;
+ struct {
+ __u32 ifindex;
+ __u32 attach_type;
+ } tcx;
};
} __attribute__((aligned(8)));
@@ -7012,6 +7098,7 @@ struct bpf_list_head {
struct bpf_list_node {
__u64 :64;
__u64 :64;
+ __u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_root {
@@ -7023,6 +7110,7 @@ struct bpf_rb_node {
__u64 :64;
__u64 :64;
__u64 :64;
+ __u64 :64;
} __attribute__((aligned(8)));
struct bpf_refcount {
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index a78a8096f4ce..73a47da885dc 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -25,6 +25,12 @@
* application.
*/
#define XDP_USE_NEED_WAKEUP (1 << 3)
+/* By setting this option, userspace application indicates that it can
+ * handle multiple descriptors per packet thus enabling xsk core to split
+ * multi-buffer XDP frames into multiple Rx descriptors. Without this set
+ * such frames will be dropped by xsk.
+ */
+#define XDP_USE_SG (1 << 4)
/* Flags for xsk_umem_config flags */
#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
@@ -106,6 +112,9 @@ struct xdp_desc {
__u32 options;
};
+/* Flag indicating packet constitutes of multiple buffers*/
+#define XDP_PKT_CONTD (1 << 0)
+
/* UMEM descriptor is __u64 */
#endif /* _LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 639524b59930..bf71698a1e82 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -41,6 +41,7 @@ enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
NETDEV_A_DEV_XDP_FEATURES,
+ NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
__NETDEV_A_DEV_MAX,
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index ed86b37d8024..c9b6b311a441 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -629,55 +629,89 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
}
-int bpf_prog_attach_opts(int prog_fd, int target_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts)
+int bpf_prog_attach_opts(int prog_fd, int target, enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
+ __u32 relative_id, flags;
+ int ret, relative_fd;
union bpf_attr attr;
- int ret;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
return libbpf_err(-EINVAL);
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+
memset(&attr, 0, attr_sz);
- attr.target_fd = target_fd;
- attr.attach_bpf_fd = prog_fd;
- attr.attach_type = type;
- attr.attach_flags = OPTS_GET(opts, flags, 0);
- attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
+ attr.target_fd = target;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.replace_bpf_fd = OPTS_GET(opts, replace_fd, 0);
+ attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
+
+ if (relative_id) {
+ attr.attach_flags = flags | BPF_F_ID;
+ attr.relative_id = relative_id;
+ } else {
+ attr.attach_flags = flags;
+ attr.relative_fd = relative_fd;
+ }
ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
-int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+int bpf_prog_detach_opts(int prog_fd, int target, enum bpf_attach_type type,
+ const struct bpf_prog_detach_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
+ __u32 relative_id, flags;
+ int ret, relative_fd;
union bpf_attr attr;
- int ret;
+
+ if (!OPTS_VALID(opts, bpf_prog_detach_opts))
+ return libbpf_err(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
- attr.target_fd = target_fd;
- attr.attach_type = type;
+ attr.target_fd = target;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
+
+ if (relative_id) {
+ attr.attach_flags = flags | BPF_F_ID;
+ attr.relative_id = relative_id;
+ } else {
+ attr.attach_flags = flags;
+ attr.relative_fd = relative_fd;
+ }
ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
-int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
- const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
- union bpf_attr attr;
- int ret;
-
- memset(&attr, 0, attr_sz);
- attr.target_fd = target_fd;
- attr.attach_bpf_fd = prog_fd;
- attr.attach_type = type;
+ return bpf_prog_detach_opts(0, target_fd, type, NULL);
+}
- ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
- return libbpf_err_errno(ret);
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ return bpf_prog_detach_opts(prog_fd, target_fd, type, NULL);
}
int bpf_link_create(int prog_fd, int target_fd,
@@ -685,9 +719,9 @@ int bpf_link_create(int prog_fd, int target_fd,
const struct bpf_link_create_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, link_create);
- __u32 target_btf_id, iter_info_len;
+ __u32 target_btf_id, iter_info_len, relative_id;
+ int fd, err, relative_fd;
union bpf_attr attr;
- int fd, err;
if (!OPTS_VALID(opts, bpf_link_create_opts))
return libbpf_err(-EINVAL);
@@ -741,6 +775,30 @@ int bpf_link_create(int prog_fd, int target_fd,
if (!OPTS_ZEROED(opts, tracing))
return libbpf_err(-EINVAL);
break;
+ case BPF_NETFILTER:
+ attr.link_create.netfilter.pf = OPTS_GET(opts, netfilter.pf, 0);
+ attr.link_create.netfilter.hooknum = OPTS_GET(opts, netfilter.hooknum, 0);
+ attr.link_create.netfilter.priority = OPTS_GET(opts, netfilter.priority, 0);
+ attr.link_create.netfilter.flags = OPTS_GET(opts, netfilter.flags, 0);
+ if (!OPTS_ZEROED(opts, netfilter))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_TCX_INGRESS:
+ case BPF_TCX_EGRESS:
+ relative_fd = OPTS_GET(opts, tcx.relative_fd, 0);
+ relative_id = OPTS_GET(opts, tcx.relative_id, 0);
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+ if (relative_id) {
+ attr.link_create.tcx.relative_id = relative_id;
+ attr.link_create.flags |= BPF_F_ID;
+ } else {
+ attr.link_create.tcx.relative_fd = relative_fd;
+ }
+ attr.link_create.tcx.expected_revision = OPTS_GET(opts, tcx.expected_revision, 0);
+ if (!OPTS_ZEROED(opts, tcx))
+ return libbpf_err(-EINVAL);
+ break;
default:
if (!OPTS_ZEROED(opts, flags))
return libbpf_err(-EINVAL);
@@ -833,8 +891,7 @@ int bpf_iter_create(int link_fd)
return libbpf_err_errno(fd);
}
-int bpf_prog_query_opts(int target_fd,
- enum bpf_attach_type type,
+int bpf_prog_query_opts(int target, enum bpf_attach_type type,
struct bpf_prog_query_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, query);
@@ -845,18 +902,20 @@ int bpf_prog_query_opts(int target_fd,
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
-
- attr.query.target_fd = target_fd;
- attr.query.attach_type = type;
- attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
- attr.query.prog_cnt = OPTS_GET(opts, prog_cnt, 0);
- attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
- attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
+ attr.query.target_fd = target;
+ attr.query.attach_type = type;
+ attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
+ attr.query.count = OPTS_GET(opts, count, 0);
+ attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
+ attr.query.link_ids = ptr_to_u64(OPTS_GET(opts, link_ids, NULL));
+ attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
+ attr.query.link_attach_flags = ptr_to_u64(OPTS_GET(opts, link_attach_flags, NULL));
ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
OPTS_SET(opts, attach_flags, attr.query.attach_flags);
- OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
+ OPTS_SET(opts, revision, attr.query.revision);
+ OPTS_SET(opts, count, attr.query.count);
return libbpf_err_errno(ret);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9aa0ee473754..044a74ffc38a 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -312,22 +312,68 @@ LIBBPF_API int bpf_obj_get(const char *pathname);
LIBBPF_API int bpf_obj_get_opts(const char *pathname,
const struct bpf_obj_get_opts *opts);
-struct bpf_prog_attach_opts {
- size_t sz; /* size of this struct for forward/backward compatibility */
- unsigned int flags;
- int replace_prog_fd;
-};
-#define bpf_prog_attach_opts__last_field replace_prog_fd
-
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
-LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts);
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+struct bpf_prog_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ union {
+ int replace_prog_fd;
+ int replace_fd;
+ };
+ int relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_prog_attach_opts__last_field expected_revision
+
+struct bpf_prog_detach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ int relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_prog_detach_opts__last_field expected_revision
+
+/**
+ * @brief **bpf_prog_attach_opts()** attaches the BPF program corresponding to
+ * *prog_fd* to a *target* which can represent a file descriptor or netdevice
+ * ifindex.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param target attach location file descriptor or ifindex
+ * @param type attach type for the BPF program
+ * @param opts options for configuring the attachment
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int target,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+
+/**
+ * @brief **bpf_prog_detach_opts()** detaches the BPF program corresponding to
+ * *prog_fd* from a *target* which can represent a file descriptor or netdevice
+ * ifindex.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param target detach location file descriptor or ifindex
+ * @param type detach type for the BPF program
+ * @param opts options for configuring the detachment
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_detach_opts(int prog_fd, int target,
+ enum bpf_attach_type type,
+ const struct bpf_prog_detach_opts *opts);
+
union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -349,6 +395,17 @@ struct bpf_link_create_opts {
struct {
__u64 cookie;
} tracing;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
+ struct {
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ } tcx;
};
size_t :0;
};
@@ -489,13 +546,31 @@ struct bpf_prog_query_opts {
__u32 query_flags;
__u32 attach_flags; /* output argument */
__u32 *prog_ids;
- __u32 prog_cnt; /* input+output argument */
+ union {
+ /* input+output argument */
+ __u32 prog_cnt;
+ __u32 count;
+ };
__u32 *prog_attach_flags;
+ __u32 *link_ids;
+ __u32 *link_attach_flags;
+ __u64 revision;
+ size_t :0;
};
-#define bpf_prog_query_opts__last_field prog_attach_flags
+#define bpf_prog_query_opts__last_field revision
-LIBBPF_API int bpf_prog_query_opts(int target_fd,
- enum bpf_attach_type type,
+/**
+ * @brief **bpf_prog_query_opts()** queries the BPF programs and BPF links
+ * which are attached to *target* which can represent a file descriptor or
+ * netdevice ifindex.
+ *
+ * @param target query location file descriptor or ifindex
+ * @param type attach type for the BPF program
+ * @param opts options for configuring the query
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_query_opts(int target, enum bpf_attach_type type,
struct bpf_prog_query_opts *opts);
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index 0a5bf1937a7c..c12f8320e668 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -80,16 +80,6 @@ struct hashmap {
size_t sz;
};
-#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
- .hash_fn = (hash_fn), \
- .equal_fn = (equal_fn), \
- .ctx = (ctx), \
- .buckets = NULL, \
- .cap = 0, \
- .cap_bits = 0, \
- .sz = 0, \
-}
-
void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn, void *ctx);
struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 214f828ece6b..17883f5a44b9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -118,6 +118,8 @@ static const char * const attach_type_name[] = {
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
[BPF_STRUCT_OPS] = "struct_ops",
[BPF_NETFILTER] = "netfilter",
+ [BPF_TCX_INGRESS] = "tcx_ingress",
+ [BPF_TCX_EGRESS] = "tcx_egress",
};
static const char * const link_type_name[] = {
@@ -132,6 +134,7 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
[BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_LINK_TYPE_NETFILTER] = "netfilter",
+ [BPF_LINK_TYPE_TCX] = "tcx",
};
static const char * const map_type_name[] = {
@@ -5471,6 +5474,10 @@ static int load_module_btfs(struct bpf_object *obj)
err = bpf_btf_get_next_id(id, &id);
if (err && errno == ENOENT)
return 0;
+ if (err && errno == EPERM) {
+ pr_debug("skipping module BTFs loading, missing privileges\n");
+ return 0;
+ }
if (err) {
err = -errno;
pr_warn("failed to iterate BTF objects: %d\n", err);
@@ -6157,7 +6164,11 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
if (main_prog == subprog)
return 0;
relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
- if (!relos)
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (!relos && new_cnt)
return -ENOMEM;
if (subprog->nr_reloc)
memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
@@ -8528,7 +8539,8 @@ int bpf_program__set_insns(struct bpf_program *prog,
return -EBUSY;
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
- if (!insns) {
+ /* NULL is a valid return from reallocarray if the new count is zero */
+ if (!insns && new_insn_cnt) {
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
return -ENOMEM;
}
@@ -8558,13 +8570,31 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
return prog->type;
}
+static size_t custom_sec_def_cnt;
+static struct bpf_sec_def *custom_sec_defs;
+static struct bpf_sec_def custom_fallback_def;
+static bool has_custom_fallback_def;
+static int last_custom_sec_def_handler_id;
+
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
if (prog->obj->loaded)
return libbpf_err(-EBUSY);
+ /* if type is not changed, do nothing */
+ if (prog->type == type)
+ return 0;
+
prog->type = type;
- prog->sec_def = NULL;
+
+ /* If a program type was changed, we need to reset associated SEC()
+ * handler, as it will be invalid now. The only exception is a generic
+ * fallback handler, which by definition is program type-agnostic and
+ * is a catch-all custom handler, optionally set by the application,
+ * so should be able to handle any type of BPF program.
+ */
+ if (prog->sec_def != &custom_fallback_def)
+ prog->sec_def = NULL;
return 0;
}
@@ -8669,9 +8699,13 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
- SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
- SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE),
- SEC_DEF("action", SCHED_ACT, 0, SEC_NONE),
+ SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
+ SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
+ SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
+ SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
+ SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
+ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
+ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
@@ -8740,13 +8774,6 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
};
-static size_t custom_sec_def_cnt;
-static struct bpf_sec_def *custom_sec_defs;
-static struct bpf_sec_def custom_fallback_def;
-static bool has_custom_fallback_def;
-
-static int last_custom_sec_def_handler_id;
-
int libbpf_register_prog_handler(const char *sec,
enum bpf_prog_type prog_type,
enum bpf_attach_type exp_attach_type,
@@ -8826,7 +8853,11 @@ int libbpf_unregister_prog_handler(int handler_id)
/* try to shrink the array, but it's ok if we couldn't */
sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
- if (sec_defs)
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (sec_defs || custom_sec_def_cnt == 0)
custom_sec_defs = sec_defs;
return 0;
@@ -10224,6 +10255,18 @@ static const char *tracefs_uprobe_events(void)
return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
}
+static const char *tracefs_available_filter_functions(void)
+{
+ return use_debugfs() ? DEBUGFS"/available_filter_functions"
+ : TRACEFS"/available_filter_functions";
+}
+
+static const char *tracefs_available_filter_functions_addrs(void)
+{
+ return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
+ : TRACEFS"/available_filter_functions_addrs";
+}
+
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
const char *kfunc_name, size_t offset)
{
@@ -10539,25 +10582,158 @@ struct kprobe_multi_resolve {
size_t cnt;
};
-static int
-resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
- const char *sym_name, void *ctx)
+struct avail_kallsyms_data {
+ char **syms;
+ size_t cnt;
+ struct kprobe_multi_resolve *res;
+};
+
+static int avail_func_cmp(const void *a, const void *b)
{
- struct kprobe_multi_resolve *res = ctx;
+ return strcmp(*(const char **)a, *(const char **)b);
+}
+
+static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
+ const char *sym_name, void *ctx)
+{
+ struct avail_kallsyms_data *data = ctx;
+ struct kprobe_multi_resolve *res = data->res;
int err;
- if (!glob_match(sym_name, res->pattern))
+ if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
return 0;
- err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
- res->cnt + 1);
+ err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
if (err)
return err;
- res->addrs[res->cnt++] = (unsigned long) sym_addr;
+ res->addrs[res->cnt++] = (unsigned long)sym_addr;
return 0;
}
+static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
+{
+ const char *available_functions_file = tracefs_available_filter_functions();
+ struct avail_kallsyms_data data;
+ char sym_name[500];
+ FILE *f;
+ int err = 0, ret, i;
+ char **syms = NULL;
+ size_t cap = 0, cnt = 0;
+
+ f = fopen(available_functions_file, "re");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open %s: %d\n", available_functions_file, err);
+ return err;
+ }
+
+ while (true) {
+ char *name;
+
+ ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
+ if (ret == EOF && feof(f))
+ break;
+
+ if (ret != 1) {
+ pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!glob_match(sym_name, res->pattern))
+ continue;
+
+ err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
+ if (err)
+ goto cleanup;
+
+ name = strdup(sym_name);
+ if (!name) {
+ err = -errno;
+ goto cleanup;
+ }
+
+ syms[cnt++] = name;
+ }
+
+ /* no entries found, bail out */
+ if (cnt == 0) {
+ err = -ENOENT;
+ goto cleanup;
+ }
+
+ /* sort available functions */
+ qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
+
+ data.syms = syms;
+ data.res = res;
+ data.cnt = cnt;
+ libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
+
+ if (res->cnt == 0)
+ err = -ENOENT;
+
+cleanup:
+ for (i = 0; i < cnt; i++)
+ free((char *)syms[i]);
+ free(syms);
+
+ fclose(f);
+ return err;
+}
+
+static bool has_available_filter_functions_addrs(void)
+{
+ return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
+}
+
+static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
+{
+ const char *available_path = tracefs_available_filter_functions_addrs();
+ char sym_name[500];
+ FILE *f;
+ int ret, err = 0;
+ unsigned long long sym_addr;
+
+ f = fopen(available_path, "re");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open %s: %d\n", available_path, err);
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
+ if (ret == EOF && feof(f))
+ break;
+
+ if (ret != 2) {
+ pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
+ ret);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!glob_match(sym_name, res->pattern))
+ continue;
+
+ err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
+ sizeof(*res->addrs), res->cnt + 1);
+ if (err)
+ goto cleanup;
+
+ res->addrs[res->cnt++] = (unsigned long)sym_addr;
+ }
+
+ if (res->cnt == 0)
+ err = -ENOENT;
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
struct bpf_link *
bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
const char *pattern,
@@ -10594,13 +10770,12 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
return libbpf_err_ptr(-EINVAL);
if (pattern) {
- err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
+ if (has_available_filter_functions_addrs())
+ err = libbpf_available_kprobes_parse(&res);
+ else
+ err = libbpf_available_kallsyms_parse(&res);
if (err)
goto error;
- if (!res.cnt) {
- err = -ENOENT;
- goto error;
- }
addrs = res.addrs;
cnt = res.cnt;
}
@@ -11680,11 +11855,10 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li
}
static struct bpf_link *
-bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
- const char *target_name)
+bpf_program_attach_fd(const struct bpf_program *prog,
+ int target_fd, const char *target_name,
+ const struct bpf_link_create_opts *opts)
{
- DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
- .target_btf_id = btf_id);
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -11702,7 +11876,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__expected_attach_type(prog);
- link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
@@ -11718,19 +11892,54 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
struct bpf_link *
bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
{
- return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
+ return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
}
struct bpf_link *
bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
{
- return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
+ return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
}
struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
{
/* target_fd/target_ifindex use the same field in LINK_CREATE */
- return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+ return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
+}
+
+struct bpf_link *
+bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
+ const struct bpf_tcx_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+ __u32 relative_id;
+ int relative_fd;
+
+ if (!OPTS_VALID(opts, bpf_tcx_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (!ifindex) {
+ pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+ if (relative_fd && relative_id) {
+ pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
+ link_create_opts.tcx.relative_fd = relative_fd;
+ link_create_opts.tcx.relative_id = relative_id;
+ link_create_opts.flags = OPTS_GET(opts, flags, 0);
+
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
+ return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
}
struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
@@ -11752,11 +11961,16 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
}
if (target_fd) {
+ LIBBPF_OPTS(bpf_link_create_opts, target_opts);
+
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
if (btf_id < 0)
return libbpf_err_ptr(btf_id);
- return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+ target_opts.target_btf_id = btf_id;
+
+ return bpf_program_attach_fd(prog, target_fd, "freplace",
+ &target_opts);
} else {
/* no target, so use raw_tracepoint_open for compatibility
* with old kernels
@@ -11811,6 +12025,48 @@ static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_l
return libbpf_get_error(*link);
}
+struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
+ const struct bpf_netfilter_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, lopts);
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ if (!OPTS_VALID(opts, bpf_netfilter_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return libbpf_err_ptr(-ENOMEM);
+
+ link->detach = &bpf_link__detach_fd;
+
+ lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
+ lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
+ lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
+ lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
+ if (link_fd < 0) {
+ char errmsg[STRERR_BUFSIZE];
+
+ link_fd = -errno;
+ free(link);
+ pr_warn("prog '%s': failed to attach to netfilter: %s\n",
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return libbpf_err_ptr(link_fd);
+ }
+ link->fd = link_fd;
+
+ return link;
+}
+
struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
{
struct bpf_link *link = NULL;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 754da73c643b..55b97b208754 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -718,6 +718,36 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_freplace(const struct bpf_program *prog,
int target_fd, const char *attach_func_name);
+struct bpf_netfilter_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+};
+#define bpf_netfilter_opts__last_field flags
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netfilter(const struct bpf_program *prog,
+ const struct bpf_netfilter_opts *opts);
+
+struct bpf_tcx_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ __u32 flags;
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_tcx_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
+ const struct bpf_tcx_opts *opts);
+
struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
@@ -1090,9 +1120,10 @@ struct bpf_xdp_query_opts {
__u32 skb_prog_id; /* output */
__u8 attach_mode; /* output */
__u64 feature_flags; /* output */
+ __u32 xdp_zc_max_segs; /* output */
size_t :0;
};
-#define bpf_xdp_query_opts__last_field feature_flags
+#define bpf_xdp_query_opts__last_field xdp_zc_max_segs
LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
const struct bpf_xdp_attach_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7521a2fb7626..9c7538dd5835 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -395,4 +395,7 @@ LIBBPF_1.2.0 {
LIBBPF_1.3.0 {
global:
bpf_obj_pin_opts;
+ bpf_prog_detach_opts;
+ bpf_program__attach_netfilter;
+ bpf_program__attach_tcx;
} LIBBPF_1.2.0;
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 9a7937f339df..b7060f254486 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -70,4 +70,20 @@
}; \
})
+/* Helper macro to clear and optionally reinitialize libbpf options struct
+ *
+ * Small helper macro to reset all fields and to reinitialize the common
+ * structure size member. Values provided by users in struct initializer-
+ * syntax as varargs can be provided as well to reinitialize options struct
+ * specific members.
+ */
+#define LIBBPF_OPTS_RESET(NAME, ...) \
+ do { \
+ memset(&NAME, 0, sizeof(NAME)); \
+ NAME = (typeof(NAME)) { \
+ .sz = sizeof(NAME), \
+ __VA_ARGS__ \
+ }; \
+ } while (0)
+
#endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 84dd5fa14905..090bcf6e3b3d 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -45,6 +45,7 @@ struct xdp_id_md {
struct xdp_features_md {
int ifindex;
+ __u32 xdp_zc_max_segs;
__u64 flags;
};
@@ -421,6 +422,9 @@ static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
return NL_CONT;
md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
+ if (tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS])
+ md->xdp_zc_max_segs =
+ libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]);
return NL_DONE;
}
@@ -493,6 +497,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
return libbpf_err(err);
opts->feature_flags = md.flags;
+ opts->xdp_zc_max_segs = md.xdp_zc_max_segs;
skip_feature_flags:
return 0;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index f1a141555f08..37455d00b239 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -852,8 +852,11 @@ static int bpf_link_usdt_detach(struct bpf_link *link)
* system is so exhausted on memory, it's the least of user's
* concerns, probably.
* So just do our best here to return those IDs to usdt_manager.
+ * Another edge case when we can legitimately get NULL is when
+ * new_cnt is zero, which can happen in some edge cases, so we
+ * need to be careful about that.
*/
- if (new_free_ids) {
+ if (new_free_ids || new_cnt == 0) {
memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids,
usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids));
man->free_spec_ids = new_free_ids;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 666b56f22a41..5c60a7cea732 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -8,6 +8,7 @@ TARGETS += cachestat
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
+TARGETS += connector
TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 08adc805878b..3b61e8b35d62 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -10,3 +10,5 @@ kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: a
kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0
kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3
module_attach # prog 'kprobe_multi': failed to auto-attach: -95
+fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
+fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 538df8fb8c42..882be03b179f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -12,7 +12,11 @@ BPFDIR := $(LIBDIR)/bpf
TOOLSINCDIR := $(TOOLSDIR)/include
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
APIDIR := $(TOOLSINCDIR)/uapi
+ifneq ($(O),)
+GENDIR := $(O)/include/generated
+else
GENDIR := $(abspath ../../../../include/generated)
+endif
GENHDR := $(GENDIR)/autoconf.h
HOSTPKG_CONFIG := pkg-config
@@ -331,7 +335,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -372,12 +376,12 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h
# $3 - CFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
@@ -644,11 +648,13 @@ $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
+$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(TESTING_HELPERS) \
$(TRACE_HELPERS) \
+ $(CGROUP_HELPERS) \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
$(OUTPUT)/bench_trigger.o \
@@ -661,6 +667,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
$(OUTPUT)/bench_bpf_hashmap_lookup.o \
$(OUTPUT)/bench_local_storage_create.o \
+ $(OUTPUT)/bench_htab_mem.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 41fe5a82b88b..73ce11b0547d 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -279,6 +279,7 @@ extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;
extern struct argp bench_local_storage_create_argp;
+extern struct argp bench_htab_mem_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -290,6 +291,7 @@ static const struct argp_child bench_parsers[] = {
"local_storage RCU Tasks Trace slowdown benchmark", 0 },
{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
+ { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{},
};
@@ -520,6 +522,7 @@ extern const struct bench bench_local_storage_cache_hashmap_control;
extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;
extern const struct bench bench_local_storage_create;
+extern const struct bench bench_htab_mem;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -561,6 +564,7 @@ static const struct bench *benchs[] = {
&bench_local_storage_tasks_trace,
&bench_bpf_hashmap_lookup,
&bench_local_storage_create,
+ &bench_htab_mem,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
new file mode 100644
index 000000000000..9146d3f414d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+
+#include "bench.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "htab_mem_bench.skel.h"
+
+struct htab_mem_use_case {
+ const char *name;
+ const char **progs;
+ /* Do synchronization between addition thread and deletion thread */
+ bool need_sync;
+};
+
+static struct htab_mem_ctx {
+ const struct htab_mem_use_case *uc;
+ struct htab_mem_bench *skel;
+ pthread_barrier_t *notify;
+ int fd;
+} ctx;
+
+const char *ow_progs[] = {"overwrite", NULL};
+const char *batch_progs[] = {"batch_add_batch_del", NULL};
+const char *add_del_progs[] = {"add_only", "del_only", NULL};
+const static struct htab_mem_use_case use_cases[] = {
+ { .name = "overwrite", .progs = ow_progs },
+ { .name = "batch_add_batch_del", .progs = batch_progs },
+ { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true },
+};
+
+static struct htab_mem_args {
+ u32 value_size;
+ const char *use_case;
+ bool preallocated;
+} args = {
+ .value_size = 8,
+ .use_case = "overwrite",
+ .preallocated = false,
+};
+
+enum {
+ ARG_VALUE_SIZE = 10000,
+ ARG_USE_CASE = 10001,
+ ARG_PREALLOCATED = 10002,
+};
+
+static const struct argp_option opts[] = {
+ { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+ "Set the value size of hash map (default 8)" },
+ { "use-case", ARG_USE_CASE, "USE_CASE", 0,
+ "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" },
+ { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" },
+ {},
+};
+
+static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_VALUE_SIZE:
+ args.value_size = strtoul(arg, NULL, 10);
+ if (args.value_size > 4096) {
+ fprintf(stderr, "too big value size %u\n", args.value_size);
+ argp_usage(state);
+ }
+ break;
+ case ARG_USE_CASE:
+ args.use_case = strdup(arg);
+ if (!args.use_case) {
+ fprintf(stderr, "no mem for use-case\n");
+ argp_usage(state);
+ }
+ break;
+ case ARG_PREALLOCATED:
+ args.preallocated = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_htab_mem_argp = {
+ .options = opts,
+ .parser = htab_mem_parse_arg,
+};
+
+static void htab_mem_validate(void)
+{
+ if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) {
+ fprintf(stderr, "%s needs an even number of producers\n", args.use_case);
+ exit(1);
+ }
+}
+
+static int htab_mem_bench_init_barriers(void)
+{
+ pthread_barrier_t *barriers;
+ unsigned int i, nr;
+
+ if (!ctx.uc->need_sync)
+ return 0;
+
+ nr = (env.producer_cnt + 1) / 2;
+ barriers = calloc(nr, sizeof(*barriers));
+ if (!barriers)
+ return -1;
+
+ /* Used for synchronization between two threads */
+ for (i = 0; i < nr; i++)
+ pthread_barrier_init(&barriers[i], NULL, 2);
+
+ ctx.notify = barriers;
+ return 0;
+}
+
+static void htab_mem_bench_exit_barriers(void)
+{
+ unsigned int i, nr;
+
+ if (!ctx.notify)
+ return;
+
+ nr = (env.producer_cnt + 1) / 2;
+ for (i = 0; i < nr; i++)
+ pthread_barrier_destroy(&ctx.notify[i]);
+ free(ctx.notify);
+}
+
+static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++) {
+ if (!strcmp(name, use_cases[i].name))
+ return &use_cases[i];
+ }
+
+ fprintf(stderr, "no such use-case: %s\n", name);
+ fprintf(stderr, "available use case:");
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++)
+ fprintf(stderr, " %s", use_cases[i].name);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+static void htab_mem_setup(void)
+{
+ struct bpf_map *map;
+ const char **names;
+ int err;
+
+ setup_libbpf();
+
+ ctx.uc = htab_mem_find_use_case_or_exit(args.use_case);
+ err = htab_mem_bench_init_barriers();
+ if (err) {
+ fprintf(stderr, "failed to init barrier\n");
+ exit(1);
+ }
+
+ ctx.fd = cgroup_setup_and_join("/htab_mem");
+ if (ctx.fd < 0)
+ goto cleanup;
+
+ ctx.skel = htab_mem_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ goto cleanup;
+ }
+
+ map = ctx.skel->maps.htab;
+ bpf_map__set_value_size(map, args.value_size);
+ /* Ensure that different CPUs can operate on different subset */
+ bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus));
+ if (args.preallocated)
+ bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC);
+
+ names = ctx.uc->progs;
+ while (*names) {
+ struct bpf_program *prog;
+
+ prog = bpf_object__find_program_by_name(ctx.skel->obj, *names);
+ if (!prog) {
+ fprintf(stderr, "no such program %s\n", *names);
+ goto cleanup;
+ }
+ bpf_program__set_autoload(prog, true);
+ names++;
+ }
+ ctx.skel->bss->nr_thread = env.producer_cnt;
+
+ err = htab_mem_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ goto cleanup;
+ }
+ err = htab_mem_bench__attach(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ goto cleanup;
+ }
+ return;
+
+cleanup:
+ htab_mem_bench__destroy(ctx.skel);
+ htab_mem_bench_exit_barriers();
+ if (ctx.fd >= 0) {
+ close(ctx.fd);
+ cleanup_cgroup_environment();
+ }
+ exit(1);
+}
+
+static void htab_mem_add_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Do addition */
+ (void)syscall(__NR_getpgid, 0);
+ /* Notify deletion thread to do deletion */
+ pthread_barrier_wait(notify);
+ /* Wait for deletion to complete */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void htab_mem_delete_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Wait for addition to complete */
+ pthread_barrier_wait(notify);
+ /* Do deletion */
+ (void)syscall(__NR_getppid);
+ /* Notify addition thread to do addition */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void *htab_mem_producer(void *arg)
+{
+ pthread_barrier_t *notify;
+ int seq;
+
+ if (!ctx.uc->need_sync) {
+ while (true)
+ (void)syscall(__NR_getpgid, 0);
+ return NULL;
+ }
+
+ seq = (long)arg;
+ notify = &ctx.notify[seq / 2];
+ if (seq & 1)
+ htab_mem_delete_fn(notify);
+ else
+ htab_mem_add_fn(notify);
+ return NULL;
+}
+
+static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
+{
+ char buf[32];
+ ssize_t got;
+ int fd;
+
+ fd = openat(ctx.fd, name, O_RDONLY);
+ if (fd < 0) {
+ /* cgroup v1 ? */
+ fprintf(stderr, "no %s\n", name);
+ *value = 0;
+ return;
+ }
+
+ got = read(fd, buf, sizeof(buf) - 1);
+ if (got <= 0) {
+ *value = 0;
+ return;
+ }
+ buf[got] = 0;
+
+ *value = strtoull(buf, NULL, 0);
+
+ close(fd);
+}
+
+static void htab_mem_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt;
+ htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct);
+}
+
+static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double loop, mem;
+
+ loop = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ mem = res->gp_ct / 1048576.0;
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+ printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem);
+}
+
+static void htab_mem_report_final(struct bench_res res[], int res_cnt)
+{
+ double mem_mean = 0.0, mem_stddev = 0.0;
+ double loop_mean = 0.0, loop_stddev = 0.0;
+ unsigned long peak_mem;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt);
+ }
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ loop_stddev += (loop_mean - res[i].hits / 1000.0) *
+ (loop_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) *
+ (mem_mean - res[i].gp_ct / 1048576.0) /
+ (res_cnt - 1.0);
+ }
+ loop_stddev = sqrt(loop_stddev);
+ mem_stddev = sqrt(mem_stddev);
+ }
+
+ htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem);
+ printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB,"
+ " peak memory usage %7.2lfMiB\n",
+ loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0);
+
+ cleanup_cgroup_environment();
+}
+
+const struct bench bench_htab_mem = {
+ .name = "htab-mem",
+ .argp = &bench_htab_mem_argp,
+ .validate = htab_mem_validate,
+ .setup = htab_mem_setup,
+ .producer_thread = htab_mem_producer,
+ .measure = htab_mem_measure,
+ .report_progress = htab_mem_report_progress,
+ .report_final = htab_mem_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index 3ca14ad36607..e1ee979e6acc 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -399,7 +399,7 @@ static void perfbuf_libbpf_setup(void)
ctx->skel = perfbuf_setup_skeleton();
memset(&attr, 0, sizeof(attr));
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
/* notify only every Nth sample */
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
new file mode 100755
index 000000000000..9ff5832463a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+htab_mem()
+{
+ echo -n "per-prod-op: "
+ echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/"
+ echo -n -e ", avg mem: "
+ echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/"
+ echo -n ", peak mem: "
+ echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/"
+}
+
+summarize_htab_mem()
+{
+ local bench="$1"
+ local summary=$(echo $2 | tail -n1)
+
+ printf "%-20s %s\n" "$bench" "$(htab_mem $summary)"
+}
+
+htab_mem_bench()
+{
+ local name
+
+ for name in overwrite batch_add_batch_del add_del_on_diff_cpu
+ do
+ summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")"
+ done
+}
+
+header "preallocated"
+htab_mem_bench "--preallocated"
+
+header "normal bpf ma"
+htab_mem_bench
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index aaf6ef1201c7..a6f991b56345 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -34,6 +34,11 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in bpf_testmod.ko BTF");
@@ -75,6 +80,24 @@ bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) {
return bpf_testmod_test_struct_arg_result;
}
+noinline int
+bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f, int g)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b + g;
+ return bpf_testmod_test_struct_arg_result;
+}
+
__bpf_kfunc void
bpf_testmod_test_mod_kfunc(int i)
{
@@ -191,6 +214,20 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
return a + b + c;
}
+noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
+ void *e, char f, int g)
+{
+ return a + (long)b + c + d + (long)e + f + g;
+}
+
+noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
+ void *e, char f, int g,
+ unsigned int h, long i, __u64 j,
+ unsigned long k)
+{
+ return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
+}
+
int bpf_testmod_fentry_ok;
noinline ssize_t
@@ -206,6 +243,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_1 struct_arg1 = {10};
struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
struct bpf_testmod_struct_arg_3 *struct_arg3;
+ struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -216,6 +254,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2);
(void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2);
(void)bpf_testmod_test_struct_arg_5();
+ (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4);
+ (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4, 23);
+
struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) +
sizeof(int)), GFP_KERNEL);
@@ -243,7 +286,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_fentry_test1(1) != 2 ||
bpf_testmod_fentry_test2(2, 3) != 5 ||
- bpf_testmod_fentry_test3(4, 5, 6) != 15)
+ bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
+ bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
+ 21, 22) != 133 ||
+ bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, 23, 24, 25, 26) != 231)
goto out;
bpf_testmod_fentry_ok = 1;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 9e95b37a7dff..2caee8423ee0 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -278,6 +278,18 @@ int join_cgroup(const char *relative_path)
}
/**
+ * join_root_cgroup() - Join the root cgroup
+ *
+ * This function joins the root cgroup.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_root_cgroup(void)
+{
+ return join_cgroup_from_top(CGROUP_MOUNT_PATH);
+}
+
+/**
* join_parent_cgroup() - Join a cgroup in the parent process workdir
* @relative_path: The cgroup path, relative to parent process workdir, to join
*
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index f099a166c94d..5c2cb9c8b546 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -22,6 +22,7 @@ void remove_cgroup(const char *relative_path);
unsigned long long get_cgroup_id(const char *relative_path);
int join_cgroup(const char *relative_path);
+int join_root_cgroup(void);
int join_parent_cgroup(const char *relative_path);
int setup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/cgroup_tcp_skb.h b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
new file mode 100644
index 000000000000..7f6b24f102fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+/* Define states of a socket to tracking messages sending to and from the
+ * socket.
+ *
+ * These states are based on rfc9293 with some modifications to support
+ * tracking of messages sent out from a socket. For example, when a SYN is
+ * received, a new socket is transiting to the SYN_RECV state defined in
+ * rfc9293. But, we put it in SYN_RECV_SENDING_SYN_ACK state and when
+ * SYN-ACK is sent out, it moves to SYN_RECV state. With this modification,
+ * we can track the message sent out from a socket.
+ */
+
+#ifndef __CGROUP_TCP_SKB_H__
+#define __CGROUP_TCP_SKB_H__
+
+enum {
+ INIT,
+ CLOSED,
+ SYN_SENT,
+ SYN_RECV_SENDING_SYN_ACK,
+ SYN_RECV,
+ ESTABLISHED,
+ FIN_WAIT1,
+ FIN_WAIT2,
+ CLOSE_WAIT_SENDING_ACK,
+ CLOSE_WAIT,
+ CLOSING,
+ LAST_ACK,
+ TIME_WAIT_SENDING_ACK,
+ TIME_WAIT,
+};
+
+#endif /* __CGROUP_TCP_SKB_H__ */
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
index 719225b16626..1c638d9dce1a 100644
--- a/tools/testing/selftests/bpf/gnu/stubs.h
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -1 +1 @@
-/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
new file mode 100644
index 000000000000..1a9eeefda9a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+#include "map_percpu_stats.skel.h"
+
+#define MAX_ENTRIES 16384
+#define MAX_ENTRIES_HASH_OF_MAPS 64
+#define N_THREADS 8
+#define MAX_MAP_KEY_SIZE 4
+
+static void map_info(int map_fd, struct bpf_map_info *info)
+{
+ __u32 len = sizeof(*info);
+ int ret;
+
+ memset(info, 0, sizeof(*info));
+
+ ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
+ CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
+}
+
+static const char *map_type_to_s(__u32 type)
+{
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ return "HASH";
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ return "PERCPU_HASH";
+ case BPF_MAP_TYPE_LRU_HASH:
+ return "LRU_HASH";
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ return "LRU_PERCPU_HASH";
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ return "BPF_MAP_TYPE_HASH_OF_MAPS";
+ default:
+ return "<define-me>";
+ }
+}
+
+static __u32 map_count_elements(__u32 type, int map_fd)
+{
+ __u32 key = -1;
+ int n = 0;
+
+ while (!bpf_map_get_next_key(map_fd, &key, &key))
+ n++;
+ return n;
+}
+
+#define BATCH true
+
+static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
+{
+ static __u8 values[(8 << 10) * MAX_ENTRIES];
+ void *in_batch = NULL, *out_batch;
+ __u32 save_count = count;
+ int ret;
+
+ ret = bpf_map_lookup_and_delete_batch(map_fd,
+ &in_batch, &out_batch,
+ keys, values, &count,
+ NULL);
+
+ /*
+ * Despite what uapi header says, lookup_and_delete_batch will return
+ * -ENOENT in case we successfully have deleted all elements, so check
+ * this separately
+ */
+ CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
+ "error: %s\n", strerror(errno));
+
+ CHECK(count != save_count,
+ "bpf_map_lookup_and_delete_batch",
+ "deleted not all elements: removed=%u expected=%u\n",
+ count, save_count);
+}
+
+static void delete_all_elements(__u32 type, int map_fd, bool batch)
+{
+ static __u8 val[8 << 10]; /* enough for 1024 CPUs */
+ __u32 key = -1;
+ void *keys;
+ __u32 i, n;
+ int ret;
+
+ keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
+ CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
+
+ for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
+ memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
+
+ if (batch) {
+ /* Can't mix delete_batch and delete_and_lookup_batch because
+ * they have different semantics in relation to the keys
+ * argument. However, delete_batch utilize map_delete_elem,
+ * so we actually test it in non-batch scenario */
+ delete_and_lookup_batch(map_fd, keys, n);
+ } else {
+ /* Intentionally mix delete and lookup_and_delete so we can test both */
+ for (i = 0; i < n; i++) {
+ void *keyp = keys + i*MAX_MAP_KEY_SIZE;
+
+ if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ ret = bpf_map_delete_elem(map_fd, keyp);
+ CHECK(ret < 0, "bpf_map_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ } else {
+ ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
+ CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ }
+ }
+ }
+
+ free(keys);
+}
+
+static bool is_lru(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_LRU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+struct upsert_opts {
+ __u32 map_type;
+ int map_fd;
+ __u32 n;
+};
+
+static int create_small_hash(void)
+{
+ int map_fd;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), "small");
+
+ return map_fd;
+}
+
+static void *patch_map_thread(void *arg)
+{
+ struct upsert_opts *opts = arg;
+ int val;
+ int ret;
+ int i;
+
+ for (i = 0; i < opts->n; i++) {
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ val = create_small_hash();
+ else
+ val = rand();
+ ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+ CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
+
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ close(val);
+ }
+ return NULL;
+}
+
+static void upsert_elements(struct upsert_opts *opts)
+{
+ pthread_t threads[N_THREADS];
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
+ CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_join(i[threads], NULL);
+ CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
+ }
+}
+
+static __u32 read_cur_elements(int iter_fd)
+{
+ char buf[64];
+ ssize_t n;
+ __u32 ret;
+
+ n = read(iter_fd, buf, sizeof(buf)-1);
+ CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
+ buf[n] = '\0';
+
+ errno = 0;
+ ret = (__u32)strtol(buf, NULL, 10);
+ CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
+
+ return ret;
+}
+
+static __u32 get_cur_elements(int map_id)
+{
+ struct map_percpu_stats *skel;
+ struct bpf_link *link;
+ __u32 n_elements;
+ int iter_fd;
+ int ret;
+
+ skel = map_percpu_stats__open();
+ CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
+
+ skel->bss->target_id = map_id;
+
+ ret = map_percpu_stats__load(skel);
+ CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
+
+ n_elements = read_cur_elements(iter_fd);
+
+ close(iter_fd);
+ bpf_link__destroy(link);
+ map_percpu_stats__destroy(skel);
+
+ return n_elements;
+}
+
+static void check_expected_number_elements(__u32 n_inserted, int map_fd,
+ struct bpf_map_info *info)
+{
+ __u32 n_real;
+ __u32 n_iter;
+
+ /* Count the current number of elements in the map by iterating through
+ * all the map keys via bpf_get_next_key
+ */
+ n_real = map_count_elements(info->type, map_fd);
+
+ /* The "real" number of elements should be the same as the inserted
+ * number of elements in all cases except LRU maps, where some elements
+ * may have been evicted
+ */
+ if (n_inserted == 0 || !is_lru(info->type))
+ CHECK(n_inserted != n_real, "map_count_elements",
+ "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
+
+ /* Count the current number of elements in the map using an iterator */
+ n_iter = get_cur_elements(info->id);
+
+ /* Both counts should be the same, as all updates are over */
+ CHECK(n_iter != n_real, "get_cur_elements",
+ "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
+ n_iter, n_real, map_type_to_s(info->type), info->map_flags);
+}
+
+static void __test(int map_fd)
+{
+ struct upsert_opts opts = {
+ .map_fd = map_fd,
+ };
+ struct bpf_map_info info;
+
+ map_info(map_fd, &info);
+ opts.map_type = info.type;
+ opts.n = info.max_entries;
+
+ /* Reduce the number of elements we are updating such that we don't
+ * bump into -E2BIG from non-preallocated hash maps, but still will
+ * have some evictions for LRU maps */
+ if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
+ opts.n -= 512;
+ else
+ opts.n /= 2;
+
+ /*
+ * Upsert keys [0, n) under some competition: with random values from
+ * N_THREADS threads. Check values, then delete all elements and check
+ * values again.
+ */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, !BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ /* Now do the same, but using batch delete operations */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ close(map_fd);
+}
+
+static int map_create_opts(__u32 type, const char *name,
+ struct bpf_map_create_opts *map_opts,
+ __u32 key_size, __u32 val_size)
+{
+ int max_entries;
+ int map_fd;
+
+ if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ max_entries = MAX_ENTRIES_HASH_OF_MAPS;
+ else
+ max_entries = MAX_ENTRIES;
+
+ map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), name);
+
+ return map_fd;
+}
+
+static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
+{
+ return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
+}
+
+static int create_hash(void)
+{
+ struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
+ .map_flags = BPF_F_NO_PREALLOC,
+ };
+
+ return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
+}
+
+static int create_percpu_hash(void)
+{
+ struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
+ .map_flags = BPF_F_NO_PREALLOC,
+ };
+
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
+}
+
+static int create_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
+}
+
+static int create_percpu_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
+}
+
+static int create_lru_hash(__u32 type, __u32 map_flags)
+{
+ struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
+ .map_flags = map_flags,
+ };
+
+ return map_create(type, "lru_hash", &map_opts);
+}
+
+static int create_hash_of_maps(void)
+{
+ struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
+ .map_flags = BPF_F_NO_PREALLOC,
+ .inner_map_fd = create_small_hash(),
+ };
+ int ret;
+
+ ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
+ &map_opts, sizeof(int), sizeof(int));
+ close(map_opts.inner_map_fd);
+ return ret;
+}
+
+static void map_percpu_stats_hash(void)
+{
+ __test(create_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash(void)
+{
+ __test(create_percpu_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_prealloc(void)
+{
+ __test(create_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash_prealloc(void)
+{
+ __test(create_percpu_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_of_maps(void)
+{
+ __test(create_hash_of_maps());
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_map_percpu_stats(void)
+{
+ map_percpu_stats_hash();
+ map_percpu_stats_percpu_hash();
+ map_percpu_stats_hash_prealloc();
+ map_percpu_stats_percpu_hash_prealloc();
+ map_percpu_stats_lru_hash();
+ map_percpu_stats_lru_hash_no_common();
+ map_percpu_stats_percpu_lru_hash();
+ map_percpu_stats_percpu_lru_hash_no_common();
+ map_percpu_stats_hash_of_maps();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index c8ba4009e4ab..b30ff6b3b81a 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -123,12 +123,13 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
end:
- if (srv_client_fd != -1)
- close(srv_client_fd);
if (client_fd != -1)
close(client_fd);
+ if (srv_client_fd != -1)
+ close(srv_client_fd);
if (srv_fd != -1)
close(srv_fd);
+
snprintf(cmd, sizeof(cmd), iptables, "-D");
system(cmd);
test_bpf_nf__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..95bab61a1e57
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "testing_helpers.h"
+#include "cgroup_tcp_skb.skel.h"
+#include "cgroup_tcp_skb.h"
+
+#define CGROUP_TCP_SKB_PATH "/test_cgroup_tcp_skb"
+
+static int install_filters(int cgroup_fd,
+ struct bpf_link **egress_link,
+ struct bpf_link **ingress_link,
+ struct bpf_program *egress_prog,
+ struct bpf_program *ingress_prog,
+ struct cgroup_tcp_skb *skel)
+{
+ /* Prepare filters */
+ skel->bss->g_sock_state = 0;
+ skel->bss->g_unexpected = 0;
+ *egress_link =
+ bpf_program__attach_cgroup(egress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(egress_link, "egress_link"))
+ return -1;
+ *ingress_link =
+ bpf_program__attach_cgroup(ingress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(ingress_link, "ingress_link"))
+ return -1;
+
+ return 0;
+}
+
+static void uninstall_filters(struct bpf_link **egress_link,
+ struct bpf_link **ingress_link)
+{
+ bpf_link__destroy(*egress_link);
+ *egress_link = NULL;
+ bpf_link__destroy(*ingress_link);
+ *ingress_link = NULL;
+}
+
+static int create_client_sock_v6(void)
+{
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ return fd;
+}
+
+static int create_server_sock_v6(void)
+{
+ struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(0),
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+ };
+ int fd, err;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ err = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind");
+ return -1;
+ }
+
+ err = listen(fd, 1);
+ if (err < 0) {
+ perror("listen");
+ return -1;
+ }
+
+ return fd;
+}
+
+static int get_sock_port_v6(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t len;
+ int err;
+
+ len = sizeof(addr);
+ err = getsockname(fd, (struct sockaddr *)&addr, &len);
+ if (err < 0) {
+ perror("getsockname");
+ return -1;
+ }
+
+ return ntohs(addr.sin6_port);
+}
+
+static int connect_client_server_v6(int client_fd, int listen_fd)
+{
+ struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+ };
+ int err;
+
+ addr.sin6_port = htons(get_sock_port_v6(listen_fd));
+ if (addr.sin6_port < 0)
+ return -1;
+
+ err = connect(client_fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("connect");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Connect to the server in a cgroup from the outside of the cgroup. */
+static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ int err, cp;
+ char buf[5];
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *listen_fd = create_server_sock_v6();
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ skel->bss->g_sock_port = get_sock_port_v6(*listen_fd);
+
+ /* Connect client to server */
+ err = connect_client_server_v6(*client_fd, *listen_fd);
+ if (!ASSERT_OK(err, "connect_client_server_v6"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+/* Connect to the server out of a cgroup from inside the cgroup. */
+static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+
+{
+ int err, cp;
+ char buf[5];
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *listen_fd = create_server_sock_v6();
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ skel->bss->g_sock_port = get_sock_port_v6(*listen_fd);
+
+ /* Connect client to server */
+ err = connect_client_server_v6(*client_fd, *listen_fd);
+ if (!ASSERT_OK(err, "connect_client_server_v6"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+static int close_connection(int *closing_fd, int *peer_fd, int *listen_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ __u32 saved_packet_count = 0;
+ int err;
+ int i;
+
+ /* Wait for ACKs to be sent */
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ for (i = 0;
+ skel->bss->g_packet_count != saved_packet_count && i < 10;
+ i++) {
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ }
+ if (!ASSERT_EQ(skel->bss->g_packet_count, saved_packet_count,
+ "packet_count"))
+ return -1;
+
+ skel->bss->g_packet_count = 0;
+ saved_packet_count = 0;
+
+ /* Half shutdown to make sure the closing socket having a chance to
+ * receive a FIN from the peer.
+ */
+ err = shutdown(*closing_fd, SHUT_WR);
+ if (!ASSERT_OK(err, "shutdown closing_fd"))
+ return -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ saved_packet_count = skel->bss->g_packet_count;
+
+ /* Fully shutdown the connection */
+ err = close(*peer_fd);
+ if (!ASSERT_OK(err, "close peer_fd"))
+ return -1;
+ *peer_fd = -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ err = close(*closing_fd);
+ if (!ASSERT_OK(err, "close closing_fd"))
+ return -1;
+ *closing_fd = -1;
+
+ close(*listen_fd);
+ *listen_fd = -1;
+
+ return 0;
+}
+
+/* This test case includes four scenarios:
+ * 1. Connect to the server from outside the cgroup and close the connection
+ * from outside the cgroup.
+ * 2. Connect to the server from outside the cgroup and close the connection
+ * from inside the cgroup.
+ * 3. Connect to the server from inside the cgroup and close the connection
+ * from outside the cgroup.
+ * 4. Connect to the server from inside the cgroup and close the connection
+ * from inside the cgroup.
+ *
+ * The test case is to verify that cgroup_skb/{egress,ingress} filters
+ * receive expected packets including SYN, SYN/ACK, ACK, FIN, and FIN/ACK.
+ */
+void test_cgroup_tcp_skb(void)
+{
+ struct bpf_link *ingress_link = NULL;
+ struct bpf_link *egress_link = NULL;
+ int client_fd = -1, listen_fd = -1;
+ struct cgroup_tcp_skb *skel;
+ int service_fd = -1;
+ int cgroup_fd = -1;
+ int err;
+
+ skel = cgroup_tcp_skb__open_and_load();
+ if (!ASSERT_OK(!skel, "skel_open_load"))
+ return;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+
+ cgroup_fd = create_and_get_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ goto cleanup;
+
+ /* Scenario 1 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress,
+ skel->progs.server_ingress,
+ skel);
+ if (!ASSERT_OK(err, "install_filters"))
+ goto cleanup;
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 2 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress_srv,
+ skel->progs.server_ingress_srv,
+ skel);
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 3 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress_srv,
+ skel->progs.client_ingress_srv,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 4 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress,
+ skel->progs.client_ingress,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+cleanup:
+ close(client_fd);
+ close(listen_fd);
+ close(service_fd);
+ close(cgroup_fd);
+ bpf_link__destroy(egress_link);
+ bpf_link__destroy(ingress_link);
+ cleanup_cgroup_environment();
+ cgroup_tcp_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index c0d1d61d5f66..aee1bc77a17f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -2,8 +2,9 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "fentry_test.lskel.h"
+#include "fentry_many_args.skel.h"
-static int fentry_test(struct fentry_test_lskel *fentry_skel)
+static int fentry_test_common(struct fentry_test_lskel *fentry_skel)
{
int err, prog_fd, i;
int link_fd;
@@ -37,7 +38,7 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel)
return 0;
}
-void test_fentry_test(void)
+static void fentry_test(void)
{
struct fentry_test_lskel *fentry_skel = NULL;
int err;
@@ -46,13 +47,47 @@ void test_fentry_test(void)
if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto cleanup;
- err = fentry_test(fentry_skel);
+ err = fentry_test_common(fentry_skel);
if (!ASSERT_OK(err, "fentry_first_attach"))
goto cleanup;
- err = fentry_test(fentry_skel);
+ err = fentry_test_common(fentry_skel);
ASSERT_OK(err, "fentry_second_attach");
cleanup:
fentry_test_lskel__destroy(fentry_skel);
}
+
+static void fentry_many_args(void)
+{
+ struct fentry_many_args *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_many_args_skel_load"))
+ goto cleanup;
+
+ err = fentry_many_args__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fentry_skel->bss->test1_result, 1,
+ "fentry_many_args_result1");
+ ASSERT_EQ(fentry_skel->bss->test2_result, 1,
+ "fentry_many_args_result2");
+ ASSERT_EQ(fentry_skel->bss->test3_result, 1,
+ "fentry_many_args_result3");
+
+cleanup:
+ fentry_many_args__destroy(fentry_skel);
+}
+
+void test_fentry_test(void)
+{
+ if (test__start_subtest("fentry"))
+ fentry_test();
+ if (test__start_subtest("fentry_many_args"))
+ fentry_many_args();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 101b7343036b..1c13007e37dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -2,8 +2,9 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "fexit_test.lskel.h"
+#include "fexit_many_args.skel.h"
-static int fexit_test(struct fexit_test_lskel *fexit_skel)
+static int fexit_test_common(struct fexit_test_lskel *fexit_skel)
{
int err, prog_fd, i;
int link_fd;
@@ -37,7 +38,7 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel)
return 0;
}
-void test_fexit_test(void)
+static void fexit_test(void)
{
struct fexit_test_lskel *fexit_skel = NULL;
int err;
@@ -46,13 +47,47 @@ void test_fexit_test(void)
if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto cleanup;
- err = fexit_test(fexit_skel);
+ err = fexit_test_common(fexit_skel);
if (!ASSERT_OK(err, "fexit_first_attach"))
goto cleanup;
- err = fexit_test(fexit_skel);
+ err = fexit_test_common(fexit_skel);
ASSERT_OK(err, "fexit_second_attach");
cleanup:
fexit_test_lskel__destroy(fexit_skel);
}
+
+static void fexit_many_args(void)
+{
+ struct fexit_many_args *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_many_args_skel_load"))
+ goto cleanup;
+
+ err = fexit_many_args__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fexit_skel->bss->test1_result, 1,
+ "fexit_many_args_result1");
+ ASSERT_EQ(fexit_skel->bss->test2_result, 1,
+ "fexit_many_args_result2");
+ ASSERT_EQ(fexit_skel->bss->test3_result, 1,
+ "fexit_many_args_result3");
+
+cleanup:
+ fexit_many_args__destroy(fexit_skel);
+}
+
+void test_fexit_test(void)
+{
+ if (test__start_subtest("fexit"))
+ fexit_test();
+ if (test__start_subtest("fexit_many_args"))
+ fexit_many_args();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 28cf63963cb7..64a9c95d4acf 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -30,7 +30,9 @@ void test_get_func_args_test(void)
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(topts.retval, 1234, "test_run");
+
+ ASSERT_EQ(topts.retval >> 16, 1, "test_run");
+ ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run");
ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
index fd41425d2e5c..56b5baef35c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
@@ -22,7 +22,7 @@ static void global_map_resize_bss_subtest(void)
struct test_global_map_resize *skel;
struct bpf_map *map;
const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2;
- size_t array_len, actual_sz;
+ size_t array_len, actual_sz, new_sz;
skel = test_global_map_resize__open();
if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
@@ -42,6 +42,10 @@ static void global_map_resize_bss_subtest(void)
if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
goto teardown;
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
/* set the expected number of elements based on the resized array */
array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]);
if (!ASSERT_GT(array_len, 1, "array_len"))
@@ -84,11 +88,11 @@ teardown:
static void global_map_resize_data_subtest(void)
{
- int err;
struct test_global_map_resize *skel;
struct bpf_map *map;
const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2;
- size_t array_len, actual_sz;
+ size_t array_len, actual_sz, new_sz;
+ int err;
skel = test_global_map_resize__open();
if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
@@ -108,6 +112,10 @@ static void global_map_resize_data_subtest(void)
if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
goto teardown;
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
/* set the expected number of elements based on the resized array */
array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]);
if (!ASSERT_GT(array_len, 1, "array_len"))
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index f63309fd0e28..18cf7b17463d 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -23,7 +23,7 @@ static struct {
"bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
{ #test "_missing_lock_pop_back", \
"bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
- TEST(kptr, 32)
+ TEST(kptr, 40)
TEST(global, 16)
TEST(map, 0)
TEST(inner_map, 0)
@@ -31,7 +31,7 @@ static struct {
#define TEST(test, op) \
{ #test "_kptr_incorrect_lock_" #op, \
"held lock and object are not in the same allocation\n" \
- "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
+ "bpf_spin_lock at off=40 must be held for bpf_list_head" }, \
{ #test "_global_incorrect_lock_" #op, \
"held lock and object are not in the same allocation\n" \
"bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
@@ -84,23 +84,23 @@ static struct {
{ "double_push_back", "arg#1 expected pointer to allocated object" },
{ "no_node_value_type", "bpf_list_node not found at offset=0" },
{ "incorrect_value_type",
- "operation on bpf_list_head expects arg#1 bpf_list_node at offset=40 in struct foo, "
+ "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
"but arg is at offset=0 in struct bar" },
{ "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
- { "incorrect_node_off1", "bpf_list_node not found at offset=41" },
- { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=40 in struct foo" },
+ { "incorrect_node_off1", "bpf_list_node not found at offset=49" },
+ { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
{ "no_head_type", "bpf_list_head not found at offset=0" },
{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
- { "incorrect_head_off1", "bpf_list_head not found at offset=17" },
+ { "incorrect_head_off1", "bpf_list_head not found at offset=25" },
{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
{ "pop_front_off",
- "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
- "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+ "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
+ "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
"16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
{ "pop_back_off",
- "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
- "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+ "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
+ "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
"16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
};
@@ -257,7 +257,7 @@ static struct btf *init_btf(void)
hid = btf__add_struct(btf, "bpf_list_head", 16);
if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
goto end;
- nid = btf__add_struct(btf, "bpf_list_node", 16);
+ nid = btf__add_struct(btf, "bpf_list_node", 24);
if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
goto end;
return btf;
@@ -276,7 +276,7 @@ static void list_and_rb_node_same_struct(bool refcount_field)
if (!ASSERT_OK_PTR(btf, "init_btf"))
return;
- bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 24);
+ bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 32);
if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node"))
return;
@@ -286,17 +286,17 @@ static void list_and_rb_node_same_struct(bool refcount_field)
return;
}
- id = btf__add_struct(btf, "bar", refcount_field ? 44 : 40);
+ id = btf__add_struct(btf, "bar", refcount_field ? 60 : 56);
if (!ASSERT_GT(id, 0, "btf__add_struct bar"))
return;
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
if (!ASSERT_OK(err, "btf__add_field bar::a"))
return;
- err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 128, 0);
+ err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 192, 0);
if (!ASSERT_OK(err, "btf__add_field bar::c"))
return;
if (refcount_field) {
- err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 320, 0);
+ err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 448, 0);
if (!ASSERT_OK(err, "btf__add_field bar::ref"))
return;
}
@@ -527,7 +527,7 @@ static void test_btf(void)
btf = init_btf();
if (!ASSERT_OK_PTR(btf, "init_btf"))
break;
- id = btf__add_struct(btf, "foo", 36);
+ id = btf__add_struct(btf, "foo", 44);
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -536,7 +536,7 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field foo::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field foo::c"))
break;
id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
@@ -553,7 +553,7 @@ static void test_btf(void)
btf = init_btf();
if (!ASSERT_OK_PTR(btf, "init_btf"))
break;
- id = btf__add_struct(btf, "foo", 36);
+ id = btf__add_struct(btf, "foo", 44);
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -562,13 +562,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field foo::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field foo::c"))
break;
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
break;
- id = btf__add_struct(btf, "bar", 36);
+ id = btf__add_struct(btf, "bar", 44);
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -577,7 +577,7 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field bar::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field bar::c"))
break;
id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
@@ -594,19 +594,19 @@ static void test_btf(void)
btf = init_btf();
if (!ASSERT_OK_PTR(btf, "init_btf"))
break;
- id = btf__add_struct(btf, "foo", 20);
+ id = btf__add_struct(btf, "foo", 28);
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
if (!ASSERT_OK(err, "btf__add_field foo::a"))
break;
- err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
if (!ASSERT_OK(err, "btf__add_field foo::b"))
break;
id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
break;
- id = btf__add_struct(btf, "bar", 16);
+ id = btf__add_struct(btf, "bar", 24);
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
break;
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
@@ -623,19 +623,19 @@ static void test_btf(void)
btf = init_btf();
if (!ASSERT_OK_PTR(btf, "init_btf"))
break;
- id = btf__add_struct(btf, "foo", 20);
+ id = btf__add_struct(btf, "foo", 28);
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
if (!ASSERT_OK(err, "btf__add_field foo::a"))
break;
- err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
if (!ASSERT_OK(err, "btf__add_field foo::b"))
break;
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
break;
- id = btf__add_struct(btf, "bar", 36);
+ id = btf__add_struct(btf, "bar", 44);
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -644,13 +644,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field bar::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field bar::c"))
break;
id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
break;
- id = btf__add_struct(btf, "baz", 16);
+ id = btf__add_struct(btf, "baz", 24);
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
break;
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
@@ -667,7 +667,7 @@ static void test_btf(void)
btf = init_btf();
if (!ASSERT_OK_PTR(btf, "init_btf"))
break;
- id = btf__add_struct(btf, "foo", 36);
+ id = btf__add_struct(btf, "foo", 44);
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -676,13 +676,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field foo::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field foo::c"))
break;
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
break;
- id = btf__add_struct(btf, "bar", 36);
+ id = btf__add_struct(btf, "bar", 44);
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -691,13 +691,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field bar:b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field bar:c"))
break;
id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
break;
- id = btf__add_struct(btf, "baz", 16);
+ id = btf__add_struct(btf, "baz", 24);
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
break;
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
@@ -726,7 +726,7 @@ static void test_btf(void)
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
break;
- id = btf__add_struct(btf, "bar", 36);
+ id = btf__add_struct(btf, "bar", 44);
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -735,13 +735,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field bar::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field bar::c"))
break;
id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
break;
- id = btf__add_struct(btf, "baz", 36);
+ id = btf__add_struct(btf, "baz", 44);
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
break;
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
@@ -750,13 +750,13 @@ static void test_btf(void)
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
if (!ASSERT_OK(err, "btf__add_field bar::b"))
break;
- err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
if (!ASSERT_OK(err, "btf__add_field bar::c"))
break;
id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
break;
- id = btf__add_struct(btf, "bam", 16);
+ id = btf__add_struct(btf, "bam", 24);
if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
break;
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index 5d9955af6247..a70c99c2f8c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -41,6 +41,10 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
+ ASSERT_EQ(skel->bss->fentry_result2, 1, "modify_return fentry_result2");
+ ASSERT_EQ(skel->bss->fexit_result2, 1, "modify_return fexit_result2");
+ ASSERT_EQ(skel->bss->fmod_ret_result2, 1, "modify_return fmod_ret_result2");
+
cleanup:
modify_return__destroy(skel);
}
@@ -49,9 +53,9 @@ cleanup:
void serial_test_modify_return(void)
{
run_test(0 /* input_retval */,
- 1 /* want_side_effect */,
- 4 /* want_ret */);
+ 2 /* want_side_effect */,
+ 33 /* want_ret */);
run_test(-EINVAL /* input_retval */,
0 /* want_side_effect */,
- -EINVAL /* want_ret */);
+ -EINVAL * 2 /* want_ret */);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
new file mode 100644
index 000000000000..4297a2a4cb11
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <netinet/in.h>
+#include <linux/netfilter.h>
+
+#include "test_progs.h"
+#include "test_netfilter_link_attach.skel.h"
+
+struct nf_link_test {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+
+ bool expect_success;
+ const char * const name;
+};
+
+static const struct nf_link_test nf_hook_link_tests[] = {
+ { .name = "allzero", },
+ { .pf = NFPROTO_NUMPROTO, .name = "invalid-pf", },
+ { .pf = NFPROTO_IPV4, .hooknum = 42, .name = "invalid-hooknum", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MIN, .name = "invalid-priority-min", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MAX, .name = "invalid-priority-max", },
+ { .pf = NFPROTO_IPV4, .flags = UINT_MAX, .name = "invalid-flags", },
+
+ { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", },
+
+ { .pf = NFPROTO_IPV4, .priority = -10000, .expect_success = true, .name = "attach ipv4", },
+ { .pf = NFPROTO_IPV6, .priority = 10001, .expect_success = true, .name = "attach ipv6", },
+};
+
+void test_netfilter_link_attach(void)
+{
+ struct test_netfilter_link_attach *skel;
+ struct bpf_program *prog;
+ LIBBPF_OPTS(bpf_netfilter_opts, opts);
+ int i;
+
+ skel = test_netfilter_link_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_netfilter_link_attach__open_and_load"))
+ goto out;
+
+ prog = skel->progs.nf_link_attach_test;
+ if (!ASSERT_OK_PTR(prog, "attach program"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(nf_hook_link_tests); i++) {
+ struct bpf_link *link;
+
+ if (!test__start_subtest(nf_hook_link_tests[i].name))
+ continue;
+
+#define X(opts, m, i) opts.m = nf_hook_link_tests[(i)].m
+ X(opts, pf, i);
+ X(opts, hooknum, i);
+ X(opts, priority, i);
+ X(opts, flags, i);
+#undef X
+ link = bpf_program__attach_netfilter(prog, &opts);
+ if (nf_hook_link_tests[i].expect_success) {
+ struct bpf_link *link2;
+
+ if (!ASSERT_OK_PTR(link, "program attach successful"))
+ continue;
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority");
+
+ if (!ASSERT_OK(bpf_link__destroy(link), "link destroy"))
+ break;
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ if (!ASSERT_OK_PTR(link2, "program reattach successful"))
+ continue;
+ if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy"))
+ break;
+ } else {
+ ASSERT_ERR_PTR(link, "program load failure");
+ }
+ }
+
+out:
+ test_netfilter_link_attach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
new file mode 100644
index 000000000000..8d077d150c56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <test_progs.h>
+#include "test_ptr_untrusted.skel.h"
+
+#define TP_NAME "sched_switch"
+
+void serial_test_ptr_untrusted(void)
+{
+ struct test_ptr_untrusted *skel;
+ int err;
+
+ skel = test_ptr_untrusted__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* First, attach lsm prog */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "lsm_attach"))
+ goto cleanup;
+
+ /* Second, attach raw_tp prog. The lsm prog will be triggered. */
+ skel->links.raw_tp_run = bpf_program__attach_raw_tracepoint(skel->progs.raw_tp_run,
+ TP_NAME);
+ if (!ASSERT_OK_PTR(skel->links.raw_tp_run, "raw_tp_attach"))
+ goto cleanup;
+
+ err = strncmp(skel->bss->tp_name, TP_NAME, strlen(TP_NAME));
+ ASSERT_EQ(err, 0, "cmp_tp_name");
+
+cleanup:
+ test_ptr_untrusted__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index 595cbf92bff5..7423983472c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -14,3 +14,7 @@ void test_refcounted_kptr(void)
void test_refcounted_kptr_fail(void)
{
}
+
+void test_refcounted_kptr_wrong_owner(void)
+{
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
new file mode 100644
index 000000000000..6c93215be8a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef TC_HELPERS
+#define TC_HELPERS
+#include <test_progs.h>
+
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
+static inline __u32 ifindex_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ return link_info.tcx.ifindex;
+}
+
+static inline void __assert_mprog_count(int target, int expected, bool miniq, int ifindex)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(ifindex, target, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ if (!expected && !miniq)
+ ASSERT_EQ(err, -ENOENT, "prog_query");
+ else
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static inline void assert_mprog_count(int target, int expected)
+{
+ __assert_mprog_count(target, expected, false, loopback);
+}
+
+static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected)
+{
+ __assert_mprog_count(target, expected, false, ifindex);
+}
+
+#endif /* TC_HELPERS */
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
new file mode 100644
index 000000000000..81eea5f10742
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -0,0 +1,1583 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+#include "tc_helpers.h"
+
+void serial_test_tc_links_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+}
+
+static void test_tc_links_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid2, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid2, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_before(void)
+{
+ test_tc_links_before_target(BPF_TCX_INGRESS);
+ test_tc_links_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_after(void)
+{
+ test_tc_links_after_target(BPF_TCX_INGRESS);
+ test_tc_links_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 2;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_revision(void)
+{
+ test_tc_links_revision_target(BPF_TCX_INGRESS);
+ test_tc_links_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ bool hook_created = false, tc_attached = false;
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ assert_mprog_count(target, 1);
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_links_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, lid1, lid2;
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_id = pid1,
+ .expected_revision = 2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK | BPF_F_AFTER,
+ .relative_id = lid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_link__update_program(skel->links.tc2, skel->progs.tc3);
+ if (!ASSERT_OK(err, "link_update"))
+ goto cleanup;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "link_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1);
+ if (!ASSERT_OK(err, "link_update_self"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_replace(void)
+{
+ test_tc_links_replace_target(BPF_TCX_INGRESS);
+ test_tc_links_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.flags = BPF_F_BEFORE | BPF_F_AFTER;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = pid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = 42,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, 0, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = ~0,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ .relative_id = pid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_invalid(void)
+{
+ test_tc_links_invalid_target(BPF_TCX_INGRESS);
+ test_tc_links_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid1, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid1, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_prepend(void)
+{
+ test_tc_links_prepend_target(BPF_TCX_INGRESS);
+ test_tc_links_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_append(void)
+{
+ test_tc_links_append_target(BPF_TCX_INGRESS);
+ test_tc_links_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, pid3, pid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex");
+
+ test_tc_link__destroy(skel);
+ return;
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_links_dev_cleanup(void)
+{
+ test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
new file mode 100644
index 000000000000..7914100f9b46
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -0,0 +1,2239 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+#include "tc_helpers.h"
+
+void serial_test_tc_opts_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[2];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, loopback, BPF_TCX_INGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_in;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd2, loopback, BPF_TCX_EGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_in;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_eg;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_eg:
+ err = bpf_prog_detach_opts(fd2, loopback, BPF_TCX_EGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_eg");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup_in:
+ err = bpf_prog_detach_opts(fd1, loopback, BPF_TCX_INGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_in");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+static void test_tc_opts_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_before(void)
+{
+ test_tc_opts_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_after(void)
+{
+ test_tc_opts_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[3];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, -ESTALE, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ESTALE, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_revision(void)
+{
+ test_tc_opts_revision_target(BPF_TCX_INGRESS);
+ test_tc_opts_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ bool hook_created = false, tc_attached = false;
+ __u32 fd1, fd2, fd3, id1, id2, id3;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd3;
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 2);
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 1);
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+cleanup_detach:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ __assert_mprog_count(target, 0, chain_tc_old, loopback);
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_opts_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, id1, id2, id3, detach_fd;
+ __u32 prog_ids[4], prog_flags[4];
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_attach_flags = prog_flags;
+ optq.prog_ids = prog_ids;
+
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd3;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ skel->bss->seen_tc1 = false;
+ skel->bss->seen_tc2 = false;
+ skel->bss->seen_tc3 = false;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_BEFORE,
+ .replace_prog_fd = fd3,
+ .relative_fd = fd1,
+ .expected_revision = 4,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER | BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 5,
+ );
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_replace(void)
+{
+ test_tc_opts_replace_target(BPF_TCX_INGRESS);
+ test_tc_opts_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ID,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach_x1");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_invalid(void)
+{
+ test_tc_opts_invalid_target(BPF_TCX_INGRESS);
+ test_tc_opts_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_prepend(void)
+{
+ test_tc_opts_prepend_target(BPF_TCX_INGRESS);
+ test_tc_opts_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_append(void)
+{
+ test_tc_opts_append_target(BPF_TCX_INGRESS);
+ test_tc_opts_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ err = bpf_prog_attach_opts(fd3, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ err = bpf_prog_attach_opts(fd4, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+ return;
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_opts_dev_cleanup(void)
+{
+ test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_mixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, pid4, lid2, lid4;
+ __u32 prog_flags[4], link_flags[4];
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, detach_fd;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ detach_fd = bpf_program__fd(skel->progs.tc1);
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EBUSY, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ detach_fd = bpf_program__fd(skel->progs.tc3);
+
+ assert_mprog_count(target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc4),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ optq.prog_ids = prog_ids;
+ optq.prog_attach_flags = prog_flags;
+ optq.link_ids = link_ids;
+ optq.link_attach_flags = link_flags;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(link_ids, 0, sizeof(link_ids));
+ memset(link_flags, 0, sizeof(link_flags));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup1;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.link_ids[0], 0, "link_ids[0]");
+ ASSERT_EQ(optq.link_attach_flags[0], 0, "link_flags[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.link_attach_flags[1], 0, "link_flags[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+ ASSERT_EQ(optq.link_ids[2], lid4, "link_ids[2]");
+ ASSERT_EQ(optq.link_attach_flags[2], 0, "link_flags[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_attach_flags[3], 0, "prog_flags[3]");
+ ASSERT_EQ(optq.link_ids[3], 0, "link_ids[3]");
+ ASSERT_EQ(optq.link_attach_flags[3], 0, "link_flags[3]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+cleanup1:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_mixed(void)
+{
+ test_tc_opts_mixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_mixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_demixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ __u32 pid1, pid2;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -EBUSY, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+ goto cleanup;
+
+cleanup1:
+ err = bpf_prog_detach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_demixed(void)
+{
+ test_tc_opts_demixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_demixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach(void)
+{
+ test_tc_opts_detach_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach_before(void)
+{
+ test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach_after(void)
+{
+ test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 13bcaeb028b8..56685fc03c7e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -347,7 +347,7 @@ static void syncookie_estab(void)
exp_active_estab_in.max_delack_ms = 22;
exp_passive_hdr_stg.syncookie = true;
- exp_active_hdr_stg.resend_syn = true,
+ exp_active_hdr_stg.resend_syn = true;
prepare_out();
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index 1c75a32186d6..fe0fb0c9849a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -55,6 +55,25 @@ static void test_fentry(void)
ASSERT_EQ(skel->bss->t6, 1, "t6 ret");
+ ASSERT_EQ(skel->bss->t7_a, 16, "t7:a");
+ ASSERT_EQ(skel->bss->t7_b, 17, "t7:b");
+ ASSERT_EQ(skel->bss->t7_c, 18, "t7:c");
+ ASSERT_EQ(skel->bss->t7_d, 19, "t7:d");
+ ASSERT_EQ(skel->bss->t7_e, 20, "t7:e");
+ ASSERT_EQ(skel->bss->t7_f_a, 21, "t7:f.a");
+ ASSERT_EQ(skel->bss->t7_f_b, 22, "t7:f.b");
+ ASSERT_EQ(skel->bss->t7_ret, 133, "t7 ret");
+
+ ASSERT_EQ(skel->bss->t8_a, 16, "t8:a");
+ ASSERT_EQ(skel->bss->t8_b, 17, "t8:b");
+ ASSERT_EQ(skel->bss->t8_c, 18, "t8:c");
+ ASSERT_EQ(skel->bss->t8_d, 19, "t8:d");
+ ASSERT_EQ(skel->bss->t8_e, 20, "t8:e");
+ ASSERT_EQ(skel->bss->t8_f_a, 21, "t8:f.a");
+ ASSERT_EQ(skel->bss->t8_f_b, 22, "t8:f.b");
+ ASSERT_EQ(skel->bss->t8_g, 23, "t8:g");
+ ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret");
+
tracing_struct__detach(skel);
destroy_skel:
tracing_struct__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index e91d0d1769f1..6cd7349d4a2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -88,8 +88,8 @@ void serial_test_trampoline_count(void)
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
goto cleanup;
- ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result");
- ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect");
+ ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result");
+ ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect");
cleanup:
for (; i >= 0; i--) {
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 070a13833c3f..c375e59ff28d 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -58,6 +58,7 @@
#include "verifier_stack_ptr.skel.h"
#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
+#include "verifier_typedef.skel.h"
#include "verifier_uninit.skel.h"
#include "verifier_unpriv.skel.h"
#include "verifier_unpriv_perf.skel.h"
@@ -159,6 +160,7 @@ void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
+void test_verifier_typedef(void) { RUN(verifier_typedef); }
void test_verifier_uninit(void) { RUN(verifier_uninit); }
void test_verifier_unpriv(void) { RUN(verifier_unpriv); }
void test_verifier_unpriv_perf(void) { RUN(verifier_unpriv_perf); }
diff --git a/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..1e2e73f3b749
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "cgroup_tcp_skb.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u16 g_sock_port = 0;
+__u32 g_sock_state = 0;
+int g_unexpected = 0;
+__u32 g_packet_count = 0;
+
+int needed_tcp_pkt(struct __sk_buff *skb, struct tcphdr *tcph)
+{
+ struct ipv6hdr ip6h;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 0;
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 0;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 0;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), tcph, sizeof(*tcph)))
+ return 0;
+
+ if (tcph->source != bpf_htons(g_sock_port) &&
+ tcph->dest != bpf_htons(g_sock_port))
+ return 0;
+
+ return 1;
+}
+
+/* Run accept() on a socket in the cgroup to receive a new connection. */
+static int egress_accept(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_RECV_SENDING_SYN_ACK) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_accept(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case INIT:
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV_SENDING_SYN_ACK;
+ break;
+ case SYN_RECV:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Run connect() on a socket in the cgroup to start a new connection. */
+static int egress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == INIT) {
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_SENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_SENT) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* The connection is closed by the peer outside the cgroup. */
+static int egress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case CLOSE_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSE_WAIT;
+ break;
+ case CLOSE_WAIT:
+ if (!tcph->fin)
+ g_unexpected++;
+ else
+ g_sock_state = LAST_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = CLOSE_WAIT_SENDING_ACK;
+ break;
+ case LAST_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* The connection is closed by the endpoint inside the cgroup. */
+static int egress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = FIN_WAIT1;
+ break;
+ case TIME_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case FIN_WAIT1:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = FIN_WAIT2;
+ break;
+ case FIN_WAIT2:
+ if (!tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT_SENDING_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_many_args.c b/tools/testing/selftests/bpf/progs/fentry_many_args.c
new file mode 100644
index 000000000000..b61bb92fee2c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_many_args.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_many_args.c b/tools/testing/selftests/bpf/progs/fexit_many_args.c
new file mode 100644
index 000000000000..53b335c2dafb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_many_args.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fexit/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && ret == 133;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k,
+ int ret)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 ret)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_mem_bench.c b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
new file mode 100644
index 000000000000..b1b721b14d67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define OP_BATCH 64
+
+struct update_ctx {
+ unsigned int from;
+ unsigned int step;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, 4);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+unsigned char zeroed_value[4096];
+unsigned int nr_thread = 0;
+long op_cnt = 0;
+
+static int write_htab(unsigned int i, struct update_ctx *ctx, unsigned int flags)
+{
+ bpf_map_update_elem(&htab, &ctx->from, zeroed_value, flags);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+static int overwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, 0);
+}
+
+static int newwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, BPF_NOEXIST);
+}
+
+static int del_htab(unsigned int i, struct update_ctx *ctx)
+{
+ bpf_map_delete_elem(&htab, &ctx->from);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int overwrite(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int batch_add_batch_del(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+
+ update.from = bpf_get_smp_processor_id();
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+
+ __sync_fetch_and_add(&op_cnt, 2);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int add_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, newwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getppid")
+int del_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 57440a554304..84d1777a9e6c 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -96,7 +96,7 @@ static __always_inline
int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
{
struct bpf_list_node *n;
- struct foo *f[8], *pf;
+ struct foo *f[200], *pf;
int i;
/* Loop following this check adds nodes 2-at-a-time in order to
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
new file mode 100644
index 000000000000..10b2325c1720
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 target_id;
+
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+
+ if (map && map->id == target_id)
+ BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index db388f593d0a..3325da17ec81 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -103,6 +103,8 @@ struct {
__type(value, __u32);
} m_hash SEC(".maps");
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
static inline int check_hash(void)
{
struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
@@ -115,6 +117,8 @@ static inline int check_hash(void)
VERIFY(hash->elem_size == 64);
VERIFY(hash->count.counter == 0);
+ VERIFY(bpf_map_sum_elem_count(map) == 0);
+
for (i = 0; i < HALF_ENTRIES; ++i) {
const __u32 key = i;
const __u32 val = 1;
@@ -123,6 +127,7 @@ static inline int check_hash(void)
return 0;
}
VERIFY(hash->count.counter == HALF_ENTRIES);
+ VERIFY(bpf_map_sum_elem_count(map) == HALF_ENTRIES);
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
index 8b7466a15c6b..3376d4849f58 100644
--- a/tools/testing/selftests/bpf/progs/modify_return.c
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -47,3 +47,43 @@ int BPF_PROG(fexit_test, int a, __u64 b, int ret)
return 0;
}
+
+static int sequence2;
+
+__u64 fentry_result2 = 0;
+SEC("fentry/bpf_modify_return_test2")
+int BPF_PROG(fentry_test2, int a, int *b, short c, int d, void *e, char f,
+ int g)
+{
+ sequence2++;
+ fentry_result2 = (sequence2 == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result2 = 0;
+SEC("fmod_ret/bpf_modify_return_test2")
+int BPF_PROG(fmod_ret_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result2 = (sequence2 == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result2 = 0;
+SEC("fexit/bpf_modify_return_test2")
+int BPF_PROG(fexit_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result2 = (sequence2 == 3 && ret == input_retval);
+ else
+ fexit_result2 = (sequence2 == 3 && ret == 29);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
index 0d1aa6bbace4..ea39497f11ed 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -10,6 +10,13 @@
char _license[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
/* Prototype for all of the program trace events below:
*
* TRACE_EVENT(task_newtask,
@@ -31,3 +38,12 @@ int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_fla
bpf_cpumask_first_zero(&task->cpus_mask);
return 0;
}
+
+/* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */
+SEC("tp_btf/tcp_probe")
+__failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+int BPF_PROG(test_invalid_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->next, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c
index 886ade4aa99d..833840bffd3b 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_success.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c
@@ -10,6 +10,13 @@
char _license[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
SEC("tp_btf/task_newtask")
__success
int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags)
@@ -17,3 +24,11 @@ int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags)
bpf_cpumask_test_cpu(0, task->cpus_ptr);
return 0;
}
+
+SEC("tp_btf/tcp_probe")
+__success
+int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index a3da610b1e6b..c55652fdc63a 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -24,7 +24,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
} stashed_nodes SEC(".maps");
struct node_acquire {
@@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l);
private(B) struct bpf_spin_lock alock;
private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
+private(C) struct bpf_spin_lock block;
+private(C) struct bpf_rb_root broot __contains(node_data, r);
+
static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
{
struct node_data *a;
@@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
return 0;
}
+static long __stash_map_empty_xchg(struct node_data *n, int idx)
+{
+ struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+
+ if (!mapval) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (n) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a1(void *ctx)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ m = bpf_refcount_acquire(n);
+
+ if (__stash_map_empty_xchg(n, 0)) {
+ bpf_obj_drop(m);
+ return 2;
+ }
+
+ if (__stash_map_empty_xchg(m, 1))
+ return 3;
+
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_b(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ n = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!n)
+ return 2;
+
+ bpf_spin_lock(&block);
+
+ bpf_rbtree_add(&broot, &n->r, less);
+
+ bpf_spin_unlock(&block);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a2(void *ctx)
+{
+ struct map_value *mapval;
+ struct bpf_rb_node *res;
+ struct node_data *m;
+ int idx = 1;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ m = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!m)
+ return 2;
+ bpf_spin_lock(&lock);
+
+ /* make m non-owning ref */
+ bpf_list_push_back(&head, &m->l);
+ res = bpf_rbtree_remove(&root, &m->r);
+
+ bpf_spin_unlock(&lock);
+ if (res) {
+ bpf_obj_drop(container_of(res, struct node_data, r));
+ return 3;
+ }
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
index 2588f2384246..1fbb73d3e5d5 100644
--- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -29,13 +29,16 @@ int my_int SEC(".data.non_array");
int my_array_first[1] SEC(".data.array_not_last");
int my_int_last SEC(".data.array_not_last");
+int percpu_arr[1] SEC(".data.percpu_arr");
+
SEC("tp/syscalls/sys_enter_getpid")
int bss_array_sum(void *ctx)
{
if (pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
- sum = 0;
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
for (size_t i = 0; i < bss_array_len; ++i)
sum += array[i];
@@ -49,7 +52,8 @@ int data_array_sum(void *ctx)
if (pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
- sum = 0;
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
for (size_t i = 0; i < data_array_len; ++i)
sum += my_array[i];
diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
new file mode 100644
index 000000000000..03a475160abe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+int nf_link_attach_test(struct bpf_nf_ctx *ctx)
+{
+ return NF_ACCEPT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
new file mode 100644
index 000000000000..4bdd65b5aa2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+char tp_name[128];
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ switch (cmd) {
+ case BPF_RAW_TRACEPOINT_OPEN:
+ bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1,
+ (void *)attr->raw_tracepoint.name);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+SEC("raw_tracepoint")
+int BPF_PROG(raw_tp_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
new file mode 100644
index 000000000000..ed1fd0e9cee9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+bool seen_tc1;
+bool seen_tc2;
+bool seen_tc3;
+bool seen_tc4;
+
+SEC("tc/ingress")
+int tc1(struct __sk_buff *skb)
+{
+ seen_tc1 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc2(struct __sk_buff *skb)
+{
+ seen_tc2 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc3(struct __sk_buff *skb)
+{
+ seen_tc3 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc4(struct __sk_buff *skb)
+{
+ seen_tc4 = true;
+ return TCX_NEXT;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index c435a3a8328a..515daef3c84b 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,6 +18,11 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -25,6 +30,9 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
+long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
+long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
+
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
@@ -130,4 +138,50 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
+SEC("fentry/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f)
+{
+ t7_a = a;
+ t7_b = (long)b;
+ t7_c = c;
+ t7_d = d;
+ t7_e = (long)e;
+ t7_f_a = f.a;
+ t7_f_b = f.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
+{
+ t7_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
+{
+ t8_a = a;
+ t8_b = (long)b;
+ t8_c = c;
+ t8_d = d;
+ t8_e = (long)e;
+ t8_f_a = f.a;
+ t8_f_b = f.b;
+ t8_g = g;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
+ int, ret)
+{
+ t8_ret = ret;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_typedef.c b/tools/testing/selftests/bpf/progs/verifier_typedef.c
new file mode 100644
index 000000000000..08481cfaac4b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_typedef.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test_sinfo")
+__description("typedef: resolve")
+__success __retval(0)
+__naked void resolve_typedef(void)
+{
+ asm volatile (" \
+ r1 = *(u64 *)(r1 +0); \
+ r2 = *(u64 *)(r1 +%[frags_offs]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(frags_offs,
+ offsetof(struct skb_shared_info, frags))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index a630c95c7471..24369f242853 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -15,12 +15,12 @@ struct {
static unsigned int idx;
int count = 0;
-SEC("xdp") int xsk_def_prog(struct xdp_md *xdp)
+SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
{
return bpf_redirect_map(&xsk, 0, XDP_DROP);
}
-SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
+SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp)
{
/* Drop every other packet */
if (idx++ % 2)
@@ -29,7 +29,7 @@ SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, 0, XDP_DROP);
}
-SEC("xdp") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
+SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
{
void *data, *data_meta;
struct xdp_info *meta;
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index c2ad50f26b63..2aa5a3445056 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -171,7 +171,10 @@ exec_xskxceiver
if [ -z $ETH ]; then
cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
fi
+
TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
busy_poll=1
@@ -184,6 +187,8 @@ exec_xskxceiver
if [ -z $ETH ]; then
cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
fi
failures=0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9b070cdf44ac..f83d9f65c65b 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -18,7 +18,7 @@
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
-#define MAX_SYMS 300000
+#define MAX_SYMS 400000
static struct ksym syms[MAX_SYMS];
static int sym_cnt;
@@ -46,6 +46,9 @@ int load_kallsyms_refresh(void)
break;
if (!addr)
continue;
+ if (i >= MAX_SYMS)
+ return -EFBIG;
+
syms[i].addr = (long) addr;
syms[i].name = strdup(func);
i++;
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index b39665f33524..319337bdcfc8 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -242,4 +242,5 @@
.result = REJECT,
.errstr = "R0 invalid mem access",
.errstr_unpriv = "R10 partial copy of pointer",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 83cecfbd6739..0b394a7f7a2d 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1169,6 +1169,7 @@
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"pkt_end < pkt taken check",
@@ -1190,4 +1191,5 @@
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 1a27a6210554..43776f6f92f4 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -290,6 +290,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: BPF_K",
@@ -360,6 +361,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jle32: BPF_K",
@@ -430,6 +432,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jlt32: BPF_K",
@@ -500,6 +503,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsge32: BPF_K",
@@ -570,6 +574,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsgt32: BPF_K",
@@ -640,6 +645,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsle32: BPF_K",
@@ -710,6 +716,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jslt32: BPF_K",
@@ -780,6 +787,7 @@
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: range bound deduction, reg op imm",
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index a0cfc06d75bc..d25c3e9605f1 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -68,6 +68,7 @@
.fixup_map_kptr = { 1 },
.result = REJECT,
.errstr = "kptr access cannot have variable offset",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"map_kptr: bpf_kptr_xchg non-const var_off",
@@ -121,6 +122,7 @@
.fixup_map_kptr = { 1 },
.result = REJECT,
.errstr = "kptr access misaligned expected=0 off=7",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"map_kptr: reject var_off != 0",
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 99272bb890da..0d84dd1f38b6 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -216,7 +216,7 @@
},
.fixup_map_ringbuf = { 1 },
.prog_type = BPF_PROG_TYPE_XDP,
- .flags = BPF_F_TEST_STATE_FREQ,
+ .flags = BPF_F_TEST_STATE_FREQ | F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
.errstr = "invalid access to memory, mem_size=1 off=42 size=8",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 687d83e707f8..d9fb2b730a2c 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -18,17 +18,19 @@
#include <linux/ethtool.h>
#include <linux/filter.h>
#include <linux/if_ether.h>
+#include <linux/if_link.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
-#include <linux/if_link.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -81,6 +83,12 @@ struct xsk_socket {
int fd;
};
+struct nl_mtu_req {
+ struct nlmsghdr nh;
+ struct ifinfomsg msg;
+ char buf[512];
+};
+
int xsk_umem__fd(const struct xsk_umem *umem)
{
return umem ? umem->fd : -EINVAL;
@@ -286,6 +294,132 @@ bool xsk_is_in_mode(u32 ifindex, int mode)
return false;
}
+/* Lifted from netlink.c in tools/lib/bpf */
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+/* Lifted from netlink.c in tools/lib/bpf */
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
+/* Original version lifted from netlink.c in tools/lib/bpf */
+static int netlink_recv(int sock)
+{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ int len, ret;
+
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
+ while (multipart) {
+ multipart = false;
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ goto done;
+ case NLMSG_DONE:
+ ret = 0;
+ goto done;
+ default:
+ break;
+ }
+ }
+ }
+ ret = 0;
+done:
+ free(iov.iov_base);
+ return ret;
+}
+
+int xsk_set_mtu(int ifindex, int mtu)
+{
+ struct nl_mtu_req req;
+ struct rtattr *rta;
+ int fd, ret;
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (fd < 0)
+ return fd;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.msg.ifi_family = AF_UNSPEC;
+ req.msg.ifi_index = ifindex;
+ rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+ rta->rta_type = IFLA_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
+ req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu));
+ memcpy(RTA_DATA(rta), &mtu, sizeof(mtu));
+
+ ret = send(fd, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0) {
+ close(fd);
+ return errno;
+ }
+
+ ret = netlink_recv(fd);
+ close(fd);
+ return ret;
+}
+
int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
{
int prog_fd;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 8da8d557768b..d93200fdaa8d 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -239,6 +239,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
int xsk_umem__delete(struct xsk_umem *umem);
void xsk_socket__delete(struct xsk_socket *xsk);
+int xsk_set_mtu(int ifindex, int mtu);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index ae697a10a056..29175682c44d 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -53,6 +53,13 @@ test_exit()
exit 1
}
+cleanup_iface()
+{
+ ip link set $1 mtu $2
+ ip link set $1 xdp off
+ ip link set $1 xdpgeneric off
+}
+
clear_configs()
{
[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 218d7f694e5c..3ff436706640 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -49,8 +49,11 @@
* h. tests for invalid and corner case Tx descriptors so that the correct ones
* are discarded and let through, respectively.
* i. 2K frame size tests
- *
- * Total tests: 12
+ * j. If multi-buffer is supported, send 9k packets divided into 3 frames
+ * k. If multi-buffer and huge pages are supported, send 9k packets in a single frame
+ * using unaligned mode
+ * l. If multi-buffer is supported, try various nasty combinations of descriptors to
+ * check if they pass the validation or not
*
* Flow:
* -----
@@ -73,10 +76,10 @@
#include <fcntl.h>
#include <errno.h>
#include <getopt.h>
-#include <asm/barrier.h>
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/mman.h>
+#include <linux/netdev.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
@@ -91,7 +94,6 @@
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
-#include <time.h>
#include <unistd.h>
#include "xsk_xdp_progs.skel.h"
@@ -253,6 +255,8 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
cfg.bind_flags = ifobject->bind_flags;
if (shared)
cfg.bind_flags |= XDP_SHARED_UMEM;
+ if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE)
+ cfg.bind_flags |= XDP_USE_SG;
txr = ifobject->tx_on ? &xsk->tx : NULL;
rxr = ifobject->rx_on ? &xsk->rx : NULL;
@@ -415,6 +419,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
test->total_steps = 1;
test->nb_sockets = 1;
test->fail = false;
+ test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
@@ -468,6 +473,26 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x
test->xskmap_tx = xskmap_tx;
}
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+ int err;
+
+ if (test->ifobj_rx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_rx->mtu = mtu;
+ }
+ if (test->ifobj_tx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_tx->mtu = mtu;
+ }
+
+ return 0;
+}
+
static void pkt_stream_reset(struct pkt_stream *pkt_stream)
{
if (pkt_stream)
@@ -533,23 +558,49 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
return pkt_stream;
}
+static bool pkt_continues(u32 options)
+{
+ return options & XDP_PKT_CONTD;
+}
+
static u32 ceil_u32(u32 a, u32 b)
{
return (a + b - 1) / b;
}
-static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt)
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
{
- if (!pkt || !pkt->valid)
+ u32 nb_frags = 1, next_frag;
+
+ if (!pkt)
return 1;
- return ceil_u32(pkt->len, frame_size);
+
+ if (!pkt_stream->verbatim) {
+ if (!pkt->valid || !pkt->len)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+ }
+
+ /* Search for the end of the packet in verbatim mode */
+ if (!pkt_continues(pkt->options))
+ return nb_frags;
+
+ next_frag = pkt_stream->current_pkt_nb;
+ pkt++;
+ while (next_frag++ < pkt_stream->nb_pkts) {
+ nb_frags++;
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ break;
+ pkt++;
+ }
+ return nb_frags;
}
static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
{
pkt->offset = offset;
pkt->len = len;
- if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
+ if (len > MAX_ETH_JUMBO_SIZE)
pkt->valid = false;
else
pkt->valid = true;
@@ -637,6 +688,11 @@ static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
return pkt->offset + umem_alloc_buffer(umem);
}
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+ pkt_stream->current_pkt_nb--;
+}
+
static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
u32 bytes_written)
{
@@ -657,34 +713,59 @@ static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_n
write_payload(data, pkt_nb, bytes_written, len);
}
-static void __pkt_stream_generate_custom(struct ifobject *ifobj,
- struct pkt *pkts, u32 nb_pkts)
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+ u32 nb_frames, bool verbatim)
{
+ u32 i, len = 0, pkt_nb = 0, payload = 0;
struct pkt_stream *pkt_stream;
- u32 i;
- pkt_stream = __pkt_stream_alloc(nb_pkts);
+ pkt_stream = __pkt_stream_alloc(nb_frames);
if (!pkt_stream)
exit_with_error(ENOMEM);
- for (i = 0; i < nb_pkts; i++) {
- struct pkt *pkt = &pkt_stream->pkts[i];
+ for (i = 0; i < nb_frames; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+ struct pkt *frame = &frames[i];
- pkt->offset = pkts[i].offset;
- pkt->len = pkts[i].len;
- pkt->pkt_nb = i;
- pkt->valid = pkts[i].valid;
- if (pkt->len > pkt_stream->max_pkt_len)
+ pkt->offset = frame->offset;
+ if (verbatim) {
+ *pkt = *frame;
+ pkt->pkt_nb = payload;
+ if (!frame->valid || !pkt_continues(frame->options))
+ payload++;
+ } else {
+ if (frame->valid)
+ len += frame->len;
+ if (frame->valid && pkt_continues(frame->options))
+ continue;
+
+ pkt->pkt_nb = pkt_nb;
+ pkt->len = len;
+ pkt->valid = frame->valid;
+ pkt->options = 0;
+
+ len = 0;
+ }
+
+ if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
pkt_stream->max_pkt_len = pkt->len;
+ pkt_nb++;
}
- ifobj->pkt_stream = pkt_stream;
+ pkt_stream->nb_pkts = pkt_nb;
+ pkt_stream->verbatim = verbatim;
+ return pkt_stream;
}
static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
{
- __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts);
- __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts);
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+ test->ifobj_tx->pkt_stream = pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+ test->ifobj_rx->pkt_stream = pkt_stream;
}
static void pkt_print_data(u32 *data, u32 cnt)
@@ -765,43 +846,76 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
return true;
}
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+ u32 bytes_processed)
{
- void *data = xsk_umem__get_data(buffer, addr);
- u32 seqnum, pkt_data;
+ u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+ void *data = xsk_umem__get_data(umem->buffer, addr);
- if (!pkt) {
- ksft_print_msg("[%s] too many packets received\n", __func__);
- goto error;
+ addr -= umem->base_addr;
+
+ if (addr >= umem->num_frames * umem->frame_size ||
+ addr + len > umem->num_frames * umem->frame_size) {
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+ return false;
+ }
+ if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+ return false;
}
- if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
- /* Do not try to verify packets that are smaller than minimum size. */
- return true;
+ pkt_data = data;
+ if (!bytes_processed) {
+ pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+ len -= PKT_HDR_SIZE;
+ } else {
+ bytes_processed -= PKT_HDR_SIZE;
}
- if (pkt->len != len) {
- ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
- __func__, pkt->len, len);
+ expected_seqnum = bytes_processed / sizeof(*pkt_data);
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ pkt_nb = ntohl(*pkt_data) >> 16;
+
+ if (expected_pkt_nb != pkt_nb) {
+ ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+ __func__, expected_pkt_nb, pkt_nb);
+ goto error;
+ }
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
goto error;
}
- pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
- seqnum = pkt_data >> 16;
-
- if (pkt->pkt_nb != seqnum) {
- ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
- __func__, pkt->pkt_nb, seqnum);
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ pkt_data += words_to_end;
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ expected_seqnum += words_to_end;
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
goto error;
}
return true;
error:
- pkt_dump(data, len, true);
+ pkt_dump(data, len, !bytes_processed);
return false;
}
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+ if (pkt->len != len) {
+ ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+ __func__, pkt->len, len);
+ pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+ return false;
+ }
+
+ return true;
+}
+
static void kick_tx(struct xsk_socket_info *xsk)
{
int ret;
@@ -854,8 +968,8 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
{
struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
- u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
struct ifobject *ifobj = test->ifobj_rx;
struct xsk_umem_info *umem = xsk->umem;
struct pkt *pkt;
@@ -868,6 +982,9 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
while (pkt) {
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+ u64 first_addr;
+
ret = gettimeofday(&tv_now, NULL);
if (ret)
exit_with_error(errno);
@@ -888,7 +1005,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
return TEST_FAILURE;
-
}
if (!(fds->revents & POLLIN))
@@ -913,27 +1029,59 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
}
}
- for (i = 0; i < rcvd; i++) {
+ while (frags_processed < rcvd) {
const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
u64 addr = desc->addr, orig;
orig = xsk_umem__extract_addr(addr);
addr = xsk_umem__add_offset_to_addr(addr);
- if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
+ if (!pkt) {
+ ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+ __func__, addr, desc->len);
+ return TEST_FAILURE;
+ }
+
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
!is_offset_correct(umem, pkt, addr) ||
(ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
return TEST_FAILURE;
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
if (ifobj->use_fill_ring)
*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+ if (pkt_continues(desc->options))
+ continue;
+
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
+
pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
+ }
+ frags_processed -= nb_frags;
}
if (ifobj->use_fill_ring)
- xsk_ring_prod__submit(&umem->fq, rcvd);
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
if (ifobj->release_rx)
- xsk_ring_cons__release(&xsk->rx, rcvd);
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
pthread_mutex_lock(&pacing_mutex);
pkts_in_flight -= pkts_sent;
@@ -946,13 +1094,14 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
{
+ u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+ struct pkt_stream *pkt_stream = ifobject->pkt_stream;
struct xsk_socket_info *xsk = ifobject->xsk;
struct xsk_umem_info *umem = ifobject->umem;
- u32 i, idx = 0, valid_pkts = 0, buffer_len;
bool use_poll = ifobject->use_poll;
int ret;
- buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len);
+ buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
/* pkts_in_flight might be negative if many invalid packets are sent */
if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
kick_tx(xsk);
@@ -983,17 +1132,49 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
}
for (i = 0; i < BATCH_SIZE; i++) {
- struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
- struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream);
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ u32 nb_frags_left, nb_frags, bytes_written = 0;
if (!pkt)
break;
- tx_desc->addr = pkt_get_addr(pkt, umem);
- tx_desc->len = pkt->len;
- if (pkt->valid) {
+ nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+ if (nb_frags > BATCH_SIZE - i) {
+ pkt_stream_cancel(pkt_stream);
+ xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i);
+ break;
+ }
+ nb_frags_left = nb_frags;
+
+ while (nb_frags_left--) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+ if (pkt_stream->verbatim) {
+ tx_desc->len = pkt->len;
+ tx_desc->options = pkt->options;
+ } else if (nb_frags_left) {
+ tx_desc->len = umem->frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = pkt->len - bytes_written;
+ tx_desc->options = 0;
+ }
+ if (pkt->valid)
+ pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ bytes_written);
+ bytes_written += tx_desc->len;
+
+ if (nb_frags_left) {
+ i++;
+ if (pkt_stream->verbatim)
+ pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ }
+ }
+
+ if (pkt && pkt->valid) {
valid_pkts++;
- pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0);
+ valid_frags += nb_frags;
}
}
@@ -1002,7 +1183,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
pthread_mutex_unlock(&pacing_mutex);
xsk_ring_prod__submit(&xsk->tx, i);
- xsk->outstanding_tx += valid_pkts;
+ xsk->outstanding_tx += valid_frags;
if (use_poll) {
ret = poll(fds, 1, POLL_TMOUT);
@@ -1222,7 +1403,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
u64 addr;
u32 i;
- for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) {
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
if (!pkt) {
if (!fill_up)
break;
@@ -1415,6 +1596,25 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
struct ifobject *ifobj2)
{
pthread_t t0, t1;
+ int err;
+
+ if (test->mtu > MAX_ETH_PKT_SIZE) {
+ if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+ (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+ ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
+ return TEST_SKIP;
+ }
+ if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+ (ifobj2 && !ifobj2->multi_buff_supp))) {
+ ksft_test_result_skip("Multi buffer not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+ err = test_spec_set_mtu(test, test->mtu);
+ if (err) {
+ ksft_print_msg("Error, could not set mtu.\n");
+ exit_with_error(err);
+ }
if (ifobj2) {
if (pthread_barrier_init(&barr, NULL, 2))
@@ -1616,6 +1816,16 @@ static int testapp_unaligned(struct test_spec *test)
return testapp_validate_traffic(test);
}
+static int testapp_unaligned_mb(struct test_spec *test)
+{
+ test_spec_set_name(test, "UNALIGNED_MODE_9K");
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+ return testapp_validate_traffic(test);
+}
+
static int testapp_single_pkt(struct test_spec *test)
{
struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
@@ -1624,6 +1834,55 @@ static int testapp_single_pkt(struct test_spec *test)
return testapp_validate_traffic(test);
}
+static int testapp_multi_buffer(struct test_spec *test)
+{
+ test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS");
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_invalid_desc_mb(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Valid packet for synch to start with */
+ {0, MIN_PKT_SIZE, 0, true, 0},
+ /* Zero frame len is not legal */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, 0, 0, false, 0},
+ /* Invalid address in the second frame */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid len in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid options in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+ /* Transmit 2 frags, receive 3 */
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+ /* Middle frame crosses chunk boundary with small length */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true, 0}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a chunk boundary allowed */
+ pkts[12].valid = true;
+ pkts[13].valid = true;
+ }
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
static int testapp_invalid_desc(struct test_spec *test)
{
struct xsk_umem_info *umem = test->ifobj_tx->umem;
@@ -1690,7 +1949,6 @@ static int testapp_xdp_metadata_count(struct test_spec *test)
int count = 0;
int key = 0;
- test_spec_set_name(test, "XDP_METADATA_COUNT");
test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
skel_tx->progs.xsk_xdp_populate_metadata,
skel_rx->maps.xsk, skel_tx->maps.xsk);
@@ -1724,6 +1982,48 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
}
+static int testapp_too_many_frags(struct test_spec *test)
+{
+ struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
+ u32 max_frags, i;
+
+ test_spec_set_name(test, "TOO_MANY_FRAGS");
+ if (test->mode == TEST_MODE_ZC)
+ max_frags = test->ifobj_tx->xdp_zc_max_segs;
+ else
+ max_frags = XSK_DESC__MAX_SKB_FRAGS;
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+
+ /* Valid packet for synch */
+ pkts[0].len = MIN_PKT_SIZE;
+ pkts[0].valid = true;
+
+ /* One valid packet with the max amount of frags */
+ for (i = 1; i < max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = true;
+ }
+ pkts[max_frags].options = 0;
+
+ /* An invalid packet with the max amount of frags but signals packet
+ * continues on the last frag
+ */
+ for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = false;
+ }
+
+ /* Valid packet for synch */
+ pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+ pkts[2 * max_frags + 1].valid = true;
+
+ pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
+ return testapp_validate_traffic(test);
+}
+
static int xsk_load_xdp_programs(struct ifobject *ifobj)
{
ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
@@ -1757,6 +2057,7 @@ static bool hugepages_present(void)
static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
thread_func_t func_ptr)
{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
int err;
memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
@@ -1772,6 +2073,22 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
if (hugepages_present())
ifobj->unaligned_supp = true;
+
+ err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (err) {
+ ksft_print_msg("Error querrying XDP capabilities\n");
+ exit_with_error(-err);
+ }
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+ ifobj->multi_buff_supp = true;
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (query_opts.xdp_zc_max_segs > 1) {
+ ifobj->multi_buff_zc_supp = true;
+ ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+ } else {
+ ifobj->xdp_zc_max_segs = 0;
+ }
+ }
}
static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
@@ -1804,6 +2121,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
test_spec_set_name(test, "RUN_TO_COMPLETION");
ret = testapp_validate_traffic(test);
break;
+ case TEST_TYPE_RUN_TO_COMPLETION_MB:
+ ret = testapp_multi_buffer(test);
+ break;
case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
ret = testapp_single_pkt(test);
@@ -1866,9 +2186,22 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
ret = testapp_invalid_desc(test);
break;
}
+ case TEST_TYPE_ALIGNED_INV_DESC_MB:
+ test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF");
+ ret = testapp_invalid_desc_mb(test);
+ break;
+ case TEST_TYPE_UNALIGNED_INV_DESC_MB:
+ test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF");
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ ret = testapp_invalid_desc_mb(test);
+ break;
case TEST_TYPE_UNALIGNED:
ret = testapp_unaligned(test);
break;
+ case TEST_TYPE_UNALIGNED_MB:
+ ret = testapp_unaligned_mb(test);
+ break;
case TEST_TYPE_HEADROOM:
ret = testapp_headroom(test);
break;
@@ -1876,8 +2209,17 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
ret = testapp_xdp_drop(test);
break;
case TEST_TYPE_XDP_METADATA_COUNT:
+ test_spec_set_name(test, "XDP_METADATA_COUNT");
+ ret = testapp_xdp_metadata_count(test);
+ break;
+ case TEST_TYPE_XDP_METADATA_COUNT_MB:
+ test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF");
+ test->mtu = MAX_ETH_JUMBO_SIZE;
ret = testapp_xdp_metadata_count(test);
break;
+ case TEST_TYPE_TOO_MANY_FRAGS:
+ ret = testapp_too_many_frags(test);
+ break;
default:
break;
}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index aaf27e067640..233b66cef64a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -38,6 +38,8 @@
#define MAX_TEARDOWN_ITER 10
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
#define MIN_PKT_SIZE 64
+#define MAX_ETH_PKT_SIZE 1518
+#define MAX_ETH_JUMBO_SIZE 9000
#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
@@ -47,7 +49,11 @@
#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
#define RX_FULL_RXQSIZE 32
#define UMEM_HEADROOM_TEST_SIZE 128
-#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
+#define XSK_DESC__INVALID_OPTION (0xffff)
+#define XSK_DESC__MAX_SKB_FRAGS 18
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#define PKT_DUMP_NB_TO_PRINT 16
@@ -83,6 +89,12 @@ enum test_type {
TEST_TYPE_BPF_RES,
TEST_TYPE_XDP_DROP_HALF,
TEST_TYPE_XDP_METADATA_COUNT,
+ TEST_TYPE_XDP_METADATA_COUNT_MB,
+ TEST_TYPE_RUN_TO_COMPLETION_MB,
+ TEST_TYPE_UNALIGNED_MB,
+ TEST_TYPE_ALIGNED_INV_DESC_MB,
+ TEST_TYPE_UNALIGNED_INV_DESC_MB,
+ TEST_TYPE_TOO_MANY_FRAGS,
TEST_TYPE_MAX
};
@@ -115,6 +127,7 @@ struct pkt {
u32 len;
u32 pkt_nb;
bool valid;
+ u16 options;
};
struct pkt_stream {
@@ -122,6 +135,7 @@ struct pkt_stream {
u32 current_pkt_nb;
struct pkt *pkts;
u32 max_pkt_len;
+ bool verbatim;
};
struct ifobject;
@@ -141,7 +155,9 @@ struct ifobject {
struct bpf_program *xdp_prog;
enum test_mode mode;
int ifindex;
+ int mtu;
u32 bind_flags;
+ u32 xdp_zc_max_segs;
bool tx_on;
bool rx_on;
bool use_poll;
@@ -151,6 +167,8 @@ struct ifobject {
bool shared_umem;
bool use_metadata;
bool unaligned_supp;
+ bool multi_buff_supp;
+ bool multi_buff_zc_supp;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
};
@@ -164,6 +182,7 @@ struct test_spec {
struct bpf_program *xdp_prog_tx;
struct bpf_map *xskmap_rx;
struct bpf_map *xskmap_tx;
+ int mtu;
u16 total_steps;
u16 current_step;
u16 nb_sockets;
diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
new file mode 100644
index 000000000000..21c9f3a973a0
--- /dev/null
+++ b/tools/testing/selftests/connector/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+TEST_GEN_PROGS = proc_filter
+
+include ../lib.mk
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
new file mode 100644
index 000000000000..4fe8c6763fd8
--- /dev/null
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/connector.h>
+#include <linux/cn_proc.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <strings.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(struct proc_input))
+#define NL_MESSAGE_SIZE_NF (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(int))
+
+#define MAX_EVENTS 1
+
+volatile static int interrupted;
+static int nl_sock, ret_errno, tcount;
+static struct epoll_event evn;
+
+static int filter;
+
+#ifdef ENABLE_PRINTS
+#define Printf printf
+#else
+#define Printf ksft_print_msg
+#endif
+
+int send_message(void *pinp)
+{
+ char buff[NL_MESSAGE_SIZE];
+ struct nlmsghdr *hdr;
+ struct cn_msg *msg;
+
+ hdr = (struct nlmsghdr *)buff;
+ if (filter)
+ hdr->nlmsg_len = NL_MESSAGE_SIZE;
+ else
+ hdr->nlmsg_len = NL_MESSAGE_SIZE_NF;
+ hdr->nlmsg_type = NLMSG_DONE;
+ hdr->nlmsg_flags = 0;
+ hdr->nlmsg_seq = 0;
+ hdr->nlmsg_pid = getpid();
+
+ msg = (struct cn_msg *)NLMSG_DATA(hdr);
+ msg->id.idx = CN_IDX_PROC;
+ msg->id.val = CN_VAL_PROC;
+ msg->seq = 0;
+ msg->ack = 0;
+ msg->flags = 0;
+
+ if (filter) {
+ msg->len = sizeof(struct proc_input);
+ ((struct proc_input *)msg->data)->mcast_op =
+ ((struct proc_input *)pinp)->mcast_op;
+ ((struct proc_input *)msg->data)->event_type =
+ ((struct proc_input *)pinp)->event_type;
+ } else {
+ msg->len = sizeof(int);
+ *(int *)msg->data = *(enum proc_cn_mcast_op *)pinp;
+ }
+
+ if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
+ ret_errno = errno;
+ perror("send failed");
+ return -3;
+ }
+ return 0;
+}
+
+int register_proc_netlink(int *efd, void *input)
+{
+ struct sockaddr_nl sa_nl;
+ int err = 0, epoll_fd;
+
+ nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+ if (nl_sock == -1) {
+ ret_errno = errno;
+ perror("socket failed");
+ return -1;
+ }
+
+ bzero(&sa_nl, sizeof(sa_nl));
+ sa_nl.nl_family = AF_NETLINK;
+ sa_nl.nl_groups = CN_IDX_PROC;
+ sa_nl.nl_pid = getpid();
+
+ if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
+ ret_errno = errno;
+ perror("bind failed");
+ return -2;
+ }
+
+ epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (epoll_fd < 0) {
+ ret_errno = errno;
+ perror("epoll_create1 failed");
+ return -2;
+ }
+
+ err = send_message(input);
+
+ if (err < 0)
+ return err;
+
+ evn.events = EPOLLIN;
+ evn.data.fd = nl_sock;
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, nl_sock, &evn) < 0) {
+ ret_errno = errno;
+ perror("epoll_ctl failed");
+ return -3;
+ }
+ *efd = epoll_fd;
+ return 0;
+}
+
+static void sigint(int sig)
+{
+ interrupted = 1;
+}
+
+int handle_packet(char *buff, int fd, struct proc_event *event)
+{
+ struct nlmsghdr *hdr;
+
+ hdr = (struct nlmsghdr *)buff;
+
+ if (hdr->nlmsg_type == NLMSG_ERROR) {
+ perror("NLMSG_ERROR error\n");
+ return -3;
+ } else if (hdr->nlmsg_type == NLMSG_DONE) {
+ event = (struct proc_event *)
+ ((struct cn_msg *)NLMSG_DATA(hdr))->data;
+ tcount++;
+ switch (event->what) {
+ case PROC_EVENT_EXIT:
+ Printf("Exit process %d (tgid %d) with code %d, signal %d\n",
+ event->event_data.exit.process_pid,
+ event->event_data.exit.process_tgid,
+ event->event_data.exit.exit_code,
+ event->event_data.exit.exit_signal);
+ break;
+ case PROC_EVENT_FORK:
+ Printf("Fork process %d (tgid %d), parent %d (tgid %d)\n",
+ event->event_data.fork.child_pid,
+ event->event_data.fork.child_tgid,
+ event->event_data.fork.parent_pid,
+ event->event_data.fork.parent_tgid);
+ break;
+ case PROC_EVENT_EXEC:
+ Printf("Exec process %d (tgid %d)\n",
+ event->event_data.exec.process_pid,
+ event->event_data.exec.process_tgid);
+ break;
+ case PROC_EVENT_UID:
+ Printf("UID process %d (tgid %d) uid %d euid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.ruid,
+ event->event_data.id.e.euid);
+ break;
+ case PROC_EVENT_GID:
+ Printf("GID process %d (tgid %d) gid %d egid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.rgid,
+ event->event_data.id.e.egid);
+ break;
+ case PROC_EVENT_SID:
+ Printf("SID process %d (tgid %d)\n",
+ event->event_data.sid.process_pid,
+ event->event_data.sid.process_tgid);
+ break;
+ case PROC_EVENT_PTRACE:
+ Printf("Ptrace process %d (tgid %d), Tracer %d (tgid %d)\n",
+ event->event_data.ptrace.process_pid,
+ event->event_data.ptrace.process_tgid,
+ event->event_data.ptrace.tracer_pid,
+ event->event_data.ptrace.tracer_tgid);
+ break;
+ case PROC_EVENT_COMM:
+ Printf("Comm process %d (tgid %d) comm %s\n",
+ event->event_data.comm.process_pid,
+ event->event_data.comm.process_tgid,
+ event->event_data.comm.comm);
+ break;
+ case PROC_EVENT_COREDUMP:
+ Printf("Coredump process %d (tgid %d) parent %d, (tgid %d)\n",
+ event->event_data.coredump.process_pid,
+ event->event_data.coredump.process_tgid,
+ event->event_data.coredump.parent_pid,
+ event->event_data.coredump.parent_tgid);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+int handle_events(int epoll_fd, struct proc_event *pev)
+{
+ char buff[CONNECTOR_MAX_MSG_SIZE];
+ struct epoll_event ev[MAX_EVENTS];
+ int i, event_count = 0, err = 0;
+
+ event_count = epoll_wait(epoll_fd, ev, MAX_EVENTS, -1);
+ if (event_count < 0) {
+ ret_errno = errno;
+ if (ret_errno != EINTR)
+ perror("epoll_wait failed");
+ return -3;
+ }
+ for (i = 0; i < event_count; i++) {
+ if (!(ev[i].events & EPOLLIN))
+ continue;
+ if (recv(ev[i].data.fd, buff, sizeof(buff), 0) == -1) {
+ ret_errno = errno;
+ perror("recv failed");
+ return -3;
+ }
+ err = handle_packet(buff, ev[i].data.fd, pev);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int epoll_fd, err;
+ struct proc_event proc_ev;
+ struct proc_input input;
+
+ signal(SIGINT, sigint);
+
+ if (argc > 2) {
+ printf("Expected 0(assume no-filter) or 1 argument(-f)\n");
+ exit(1);
+ }
+
+ if (argc == 2) {
+ if (strcmp(argv[1], "-f") == 0) {
+ filter = 1;
+ } else {
+ printf("Valid option : -f (for filter feature)\n");
+ exit(1);
+ }
+ }
+
+ if (filter) {
+ input.event_type = PROC_EVENT_NONZERO_EXIT;
+ input.mcast_op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&op);
+ }
+
+ if (err < 0) {
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+
+ while (!interrupted) {
+ err = handle_events(epoll_fd, &proc_ev);
+ if (err < 0) {
+ if (ret_errno == EINTR)
+ continue;
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+ }
+
+ if (filter) {
+ input.mcast_op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&op);
+ }
+
+ close(epoll_fd);
+ close(nl_sock);
+
+ printf("Done total count: %d\n", tcount);
+ exit(0);
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
new file mode 100755
index 000000000000..b1f0781f6b25
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that filters that match on the same port range, but with different
+# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by
+# observing port range registers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_range_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+port_range_occ_get()
+{
+ devlink_resource_occ_get port_range_registers
+}
+
+port_range_occ_test()
+{
+ RET=0
+
+ local occ=$(port_range_occ_get)
+
+ # Two port range registers are used, for source and destination port
+ # ranges.
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 104 flower
+ tc filter del dev $swp1 ingress pref 2 handle 103 flower
+ tc filter del dev $swp1 ingress pref 1 handle 102 flower
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $occ"
+
+ log_test "port range occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
new file mode 100644
index 000000000000..2a70840ff14b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+
+PORT_RANGE_NUM_NETIFS=2
+
+port_range_h1_create()
+{
+ simple_if_init $h1
+}
+
+port_range_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+port_range_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+port_range_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+port_range_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $batch_file <<-EOF
+ filter add dev $swp1 ingress \
+ prot ipv4 \
+ pref 1000 \
+ flower skip_sw \
+ ip_proto udp dst_port 1-$((100 + i)) \
+ action pass
+ EOF
+ done
+
+ tc -b $batch_file
+ check_err_fail $should_fail $? "Rule insertion"
+
+ rm -f $batch_file
+}
+
+__port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ port_range_rules_create $count $should_fail
+
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
+ ((offload_count == count))
+ check_err_fail $should_fail $? "port range offload count"
+}
+
+port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __port_range_test $count $should_fail
+}
+
+port_range_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ port_range_h1_create
+ port_range_switch_create
+}
+
+port_range_cleanup()
+{
+ pre_cleanup
+
+ port_range_switch_destroy
+ port_range_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5e89657857c7..893a693ad805 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -16,7 +16,6 @@ ALL_TESTS="
bridge_deletion_test
bridge_vlan_flags_test
vlan_1_test
- lag_bridge_upper_test
duplicate_vlans_test
vlan_rif_refcount_test
subport_rif_refcount_test
@@ -211,33 +210,6 @@ vlan_1_test()
ip link del dev $swp1.1
}
-lag_bridge_upper_test()
-{
- # Test that ports cannot be enslaved to LAG devices that have uppers
- # and that failure is handled gracefully. See commit b3529af6bb0d
- # ("spectrum: Reference count VLAN entries") for more details
- RET=0
-
- ip link add name bond1 type bond mode 802.3ad
-
- ip link add name br0 type bridge vlan_filtering 1
- ip link set dev bond1 master br0
-
- ip link set dev $swp1 down
- ip link set dev $swp1 master bond1 &> /dev/null
- check_fail $? "managed to enslave port to lag when should not"
-
- # This might generate a trace, if we did not handle the failure
- # correctly
- ip -6 address add 2001:db8:1::1/64 dev $swp1
- ip -6 address del 2001:db8:1::1/64 dev $swp1
-
- log_test "lag with bridge upper"
-
- ip link del dev br0
- ip link del dev bond1
-}
-
duplicate_vlans_test()
{
# Test that on a given port a VLAN is only used once. Either as VLAN
@@ -510,9 +482,6 @@ vlan_interface_uppers_test()
ip link set dev $swp1 master br0
ip link add link br0 name br0.10 type vlan id 10
- ip link add link br0.10 name macvlan0 \
- type macvlan mode private &> /dev/null
- check_fail $? "managed to create a macvlan when should not"
ip -6 address add 2001:db8:1::1/64 dev br0.10
ip link add link br0.10 name macvlan0 type macvlan mode private
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
new file mode 120000
index 000000000000..bd670d9dc4e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
@@ -0,0 +1 @@
+../spectrum/port_range_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 688338bbeb97..a88d8a8c85f2 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -33,6 +33,7 @@ ALL_TESTS="
port
rif_mac_profile
rif_counter
+ port_range
"
for current_test in ${TESTS:-$ALL_TESTS}; do
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
new file mode 100644
index 000000000000..d0847e8ea270
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_range_scale.sh
+
+port_range_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get port_range_registers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 95d9f710a630..f981c957f097 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -30,6 +30,7 @@ ALL_TESTS="
port
rif_mac_profile
rif_counter
+ port_range
"
for current_test in ${TESTS:-$ALL_TESTS}; do
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 01c0491d64da..2e986cbf1a46 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -167,7 +167,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -196,12 +196,12 @@ CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
# $3 - CFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7f3ab2a93ed6..04341e1b38f0 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -3,6 +3,8 @@
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
@@ -85,6 +87,7 @@ TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
+TEST_PROGS += test_bridge_backup_port.sh
TEST_FILES := settings
@@ -113,7 +116,7 @@ $(MAKE_DIRS):
mkdir -p $@
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -131,7 +134,7 @@ endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS)
- $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+ $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
index 82a1c1839da6..90eb06fefa59 100644
--- a/tools/testing/selftests/net/csum.c
+++ b/tools/testing/selftests/net/csum.c
@@ -91,6 +91,8 @@
#include <sys/types.h>
#include <unistd.h>
+#include "kselftest.h"
+
static bool cfg_bad_csum;
static int cfg_family = PF_INET6;
static int cfg_num_pkt = 4;
@@ -450,7 +452,7 @@ static void send_packet(int fd, const char *buf, int len)
iov[2].iov_len = len;
msg.msg_iov = iov;
- msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+ msg.msg_iovlen = ARRAY_SIZE(iov);
msg.msg_name = &addr;
msg.msg_namelen = sizeof(addr);
@@ -505,7 +507,7 @@ static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
struct sock_fprog prog = {};
prog.filter = filter;
- prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ prog.len = ARRAY_SIZE(filter);
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
error(1, errno, "setsockopt filter");
}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 0f5e88c8f4ff..54ec2b7b7b8c 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -29,6 +29,7 @@ IPV4_TESTS="
ipv4_large_res_grp
ipv4_compat_mode
ipv4_fdb_grp_fcnal
+ ipv4_mpath_select
ipv4_torture
ipv4_res_torture
"
@@ -42,6 +43,7 @@ IPV6_TESTS="
ipv6_large_res_grp
ipv6_compat_mode
ipv6_fdb_grp_fcnal
+ ipv6_mpath_select
ipv6_torture
ipv6_res_torture
"
@@ -370,6 +372,27 @@ check_large_res_grp()
log_test $? 0 "Dump large (x$buckets) nexthop buckets"
}
+get_route_dev()
+{
+ local pfx="$1"
+ local out
+
+ if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
+ echo "$out"
+ fi
+}
+
+check_route_dev()
+{
+ local pfx="$1"
+ local expected="$2"
+ local out
+
+ out=$(get_route_dev "$pfx")
+
+ check_output "$out" "$expected"
+}
+
start_ip_monitor()
{
local mtype=$1
@@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal()
$IP link del dev vx10
}
+ipv4_mpath_select()
+{
+ local rc dev match h addr
+
+ echo
+ echo "IPv4 multipath selection"
+ echo "------------------------"
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test; need jq tool"
+ return $ksft_skip
+ fi
+
+ # Use status of existing neighbor entry when determining nexthop for
+ # multipath routes.
+ local -A gws
+ gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
+ local -A other_dev
+ other_dev=([veth1]=veth3 [veth3]=veth1)
+
+ run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+ run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+ run_cmd "$IP nexthop add id 1001 group 1/2"
+ run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
+ rc=0
+ for dev in veth1 veth3; do
+ match=0
+ for h in {1..254}; do
+ addr="172.16.101.$h"
+ if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+ match=1
+ break
+ fi
+ done
+ if (( match == 0 )); then
+ echo "SKIP: Did not find a route using device $dev"
+ return $ksft_skip
+ fi
+ run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+ if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+ rc=1
+ break
+ fi
+ run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+ done
+ log_test $rc 0 "Use valid neighbor during multipath selection"
+
+ run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
+ run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
+ run_cmd "$IP route get 172.16.101.1"
+ # if we did not crash, success
+ log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
+ipv6_mpath_select()
+{
+ local rc dev match h addr
+
+ echo
+ echo "IPv6 multipath selection"
+ echo "------------------------"
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test; need jq tool"
+ return $ksft_skip
+ fi
+
+ # Use status of existing neighbor entry when determining nexthop for
+ # multipath routes.
+ local -A gws
+ gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
+ local -A other_dev
+ other_dev=([veth1]=veth3 [veth3]=veth1)
+
+ run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+ run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+ run_cmd "$IP nexthop add id 1001 group 1/2"
+ run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
+ rc=0
+ for dev in veth1 veth3; do
+ match=0
+ for h in {1..65535}; do
+ addr=$(printf "2001:db8:101::%x" $h)
+ if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+ match=1
+ break
+ fi
+ done
+ if (( match == 0 )); then
+ echo "SKIP: Did not find a route using device $dev"
+ return $ksft_skip
+ fi
+ run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+ if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+ rc=1
+ break
+ fi
+ run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+ done
+ log_test $rc 0 "Use valid neighbor during multipath selection"
+
+ run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
+ run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
+ run_cmd "$IP route get 2001:db8:101::1"
+ # if we did not crash, success
+ log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups
#
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 770efbe24f0d..2d8bb72762a4 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -65,6 +65,8 @@ TEST_PROGS = bridge_igmp.sh \
q_in_vni.sh \
router_bridge.sh \
router_bridge_vlan.sh \
+ router_bridge_pvid_vlan_upper.sh \
+ router_bridge_vlan_upper_pvid.sh \
router_broadcast.sh \
router_mpath_nh_res.sh \
router_mpath_nh.sh \
@@ -85,6 +87,7 @@ TEST_PROGS = bridge_igmp.sh \
tc_flower.sh \
tc_flower_l2_miss.sh \
tc_flower_cfm.sh \
+ tc_flower_port_range.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9ddb68dd6a08..71f7c0c49677 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1215,6 +1215,15 @@ ping_test()
log_test "ping$3"
}
+ping_test_fails()
+{
+ RET=0
+
+ ping_do $1 $2
+ check_fail $?
+ log_test "ping fails$3"
+}
+
ping6_do()
{
local if_name=$1
@@ -1237,6 +1246,15 @@ ping6_test()
log_test "ping6$3"
}
+ping6_test_fails()
+{
+ RET=0
+
+ ping6_do $1 $2
+ check_fail $?
+ log_test "ping6 fails$3"
+}
+
learning_test()
{
local bridge=$1
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
index 8ce0aed54ece..4f33db04699d 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -1,9 +1,33 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
+# | | | | | |
+# +----|-------------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|-----------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 |
+# | | 192.0.2.2/28 | 2001:db8:2::1/64 |
+# | | 2001:db8:1::1/64 | |
+# | | | |
+# | +--------------------------------+ |
+# +---------------------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
+ config_remove_pvid
+ ping_ipv4_fails
+ ping_ipv6_fails
+ config_add_pvid
+ ping_ipv4
+ ping_ipv6
"
NUM_NETIFS=4
source lib.sh
@@ -62,6 +86,22 @@ router_destroy()
ip link del dev br1
}
+config_remove_pvid()
+{
+ log_info "Remove PVID from the bridge"
+
+ bridge vlan add dev br1 vid 1 self
+ sleep 2
+}
+
+config_add_pvid()
+{
+ log_info "Add PVID to the bridge"
+
+ bridge vlan add dev br1 vid 1 self pvid untagged
+ sleep 2
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -104,6 +144,16 @@ ping_ipv6()
ping6_test $h1 2001:db8:2::2
}
+ping_ipv4_fails()
+{
+ ping_test_fails $h1 192.0.2.130
+}
+
+ping_ipv6_fails()
+{
+ ping6_test_fails $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
new file mode 100755
index 000000000000..76e4941fef73
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# | H1 (vrf) |
+# | + $h1.10 | +----------------------+
+# | | 192.0.2.1/28 | | H2 (vrf) |
+# | | 2001:db8:1::1/64 | | + $h2 |
+# | | | | | 192.0.2.130/28 |
+# | + $h1 | | | 2001:db8:2::2/64 |
+# +---|------------------------+ +--|-------------------+
+# | |
+# +---|--------------------------------------------------|-------------------+
+# | | router (main VRF) | |
+# | +-|----------------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q, pvid=10) | 192.0.2.129/28 |
+# | | 192.0.2.2/28 | 2001:db8:2::1/64 |
+# | | 2001:db8:1::2/64 | |
+# | +------------------------------------+ |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ shuffle_pvid
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+ bridge vlan add dev br1 vid 10 pvid untagged self
+ bridge vlan add dev $swp1 vid 10
+}
+
+router_destroy()
+{
+ bridge vlan del dev $swp1 vid 10
+ bridge vlan del dev br1 vid 10 self
+
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+shuffle_pvid()
+{
+ log_info "Add and remove VLAN upper for PVID VLAN"
+
+ # Adding and removing a VLAN upper for the PVID VLAN shouldn't change
+ # anything. The address is arbitrary, just to make sure it will be an L3
+ # netdevice.
+ vlan_create br1 10 "" 192.0.2.33/28
+ sleep 1
+ vlan_destroy br1 10
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index de2b2d5480dd..b76a4a707a5b 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -1,25 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# +------------------------+ +----------------------+
-# | H1 (vrf) | | H2 (vrf) |
-# | + $h1.555 | | + $h2 |
-# | | 192.0.2.1/28 | | | 192.0.2.130/28 |
-# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
-# | | | | | |
-# | + $h1 | | | |
-# +----|-------------------+ +--|-------------------+
+# +------------------------------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.555 + $h1.777 | | + $h2 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 |
+# | | .-----------------' | | | 2001:db8:2::2/64 |
+# | |/ | | | 2001:db8:4::2/64 |
+# | + $h1 | | | |
+# +----|-------------------------------------------+ +--|-------------------+
# | |
# +----|--------------------------------------------------|-------------------+
# | SW | | |
# | +--|-------------------------------+ + $swp2 |
# | | + $swp1 | 192.0.2.129/28 |
-# | | vid 555 | 2001:db8:2::1/64 |
-# | | | |
-# | | + BR1 (802.1q) | |
+# | | vid 555 777 | 192.0.2.145/28 |
+# | | | 2001:db8:2::1/64 |
+# | | + BR1 (802.1q) | 2001:db8:4::1/64 |
# | | vid 555 pvid untagged | |
# | | 192.0.2.2/28 | |
+# | | 192.0.2.18/28 | |
# | | 2001:db8:1::2/64 | |
+# | | 2001:db8:3::2/64 | |
# | +----------------------------------+ |
# +---------------------------------------------------------------------------+
@@ -27,6 +30,14 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
vlan
+ config_777
+ ping_ipv4_fails
+ ping_ipv6_fails
+ ping_ipv4_777
+ ping_ipv6_777
+ config_555
+ ping_ipv4
+ ping_ipv6
"
NUM_NETIFS=4
source lib.sh
@@ -34,31 +45,47 @@ source lib.sh
h1_create()
{
simple_if_init $h1
+
vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+
+ vlan_create $h1 777 v$h1 192.0.2.17/28 2001:db8:3::1/64
+ ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18
+ ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2
}
h1_destroy()
{
+ ip -6 route del 2001:db8:4::/64 vrf v$h1
+ ip -4 route del 192.0.2.144/28 vrf v$h1
+ vlan_destroy $h1 777
+
ip -6 route del 2001:db8:2::/64 vrf v$h1
ip -4 route del 192.0.2.128/28 vrf v$h1
vlan_destroy $h1 555
+
simple_if_fini $h1
}
h2_create()
{
- simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \
+ 192.0.2.146/28 2001:db8:4::2/64
ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145
ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+ ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1
}
h2_destroy()
{
+ ip -6 route del 2001:db8:3::/64 vrf v$h2
ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.16/28 vrf v$h2
ip -4 route del 192.0.2.0/28 vrf v$h2
- simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+ simple_if_fini $h2 192.0.2.146/28 2001:db8:4::2/64 \
+ 192.0.2.130/28 2001:db8:2::2/64
}
router_create()
@@ -71,18 +98,23 @@ router_create()
bridge vlan add dev br1 vid 555 self pvid untagged
bridge vlan add dev $swp1 vid 555
+ bridge vlan add dev $swp1 vid 777
__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+ __addr_add_del br1 add 192.0.2.18/28 2001:db8:3::2/64
ip link set dev $swp2 up
__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+ __addr_add_del $swp2 add 192.0.2.145/28 2001:db8:4::1/64
}
router_destroy()
{
+ __addr_add_del $swp2 del 192.0.2.145/28 2001:db8:4::1/64
__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
ip link set dev $swp2 down
+ __addr_add_del br1 del 192.0.2.18/28 2001:db8:3::2/64
__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
ip link set dev $swp1 down
ip link set dev $swp1 nomaster
@@ -108,6 +140,24 @@ setup_prepare()
forwarding_enable
}
+config_555()
+{
+ log_info "Configure VLAN 555 as PVID"
+
+ bridge vlan add dev br1 vid 555 self pvid untagged
+ bridge vlan del dev br1 vid 777 self
+ sleep 2
+}
+
+config_777()
+{
+ log_info "Configure VLAN 777 as PVID"
+
+ bridge vlan add dev br1 vid 777 self pvid untagged
+ bridge vlan del dev br1 vid 555 self
+ sleep 2
+}
+
cleanup()
{
pre_cleanup
@@ -136,12 +186,32 @@ vlan()
ping_ipv4()
{
- ping_test $h1 192.0.2.130
+ ping_test $h1.555 192.0.2.130
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:2::2
+ ping6_test $h1.555 2001:db8:2::2
+}
+
+ping_ipv4_fails()
+{
+ ping_test_fails $h1.555 192.0.2.130 ": via 555"
+}
+
+ping_ipv6_fails()
+{
+ ping6_test_fails $h1.555 2001:db8:2::2 ": via 555"
+}
+
+ping_ipv4_777()
+{
+ ping_test $h1.777 192.0.2.146 ": via 777"
+}
+
+ping_ipv6_777()
+{
+ ping6_test $h1.777 2001:db8:4::2 ": via 777"
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
new file mode 100755
index 000000000000..138558452402
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# | H1 (vrf) |
+# | + $h1.10 | +----------------------+
+# | | 192.0.2.1/28 | | H2 (vrf) |
+# | | 2001:db8:1::1/64 | | + $h2 |
+# | | | | | 192.0.2.130/28 |
+# | + $h1 | | | 2001:db8:2::2/64 |
+# +---|------------------------+ +--|-------------------+
+# | |
+# +---|--------------------------------------------------|-------------------+
+# | | router (main VRF) | |
+# | +-|--------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 |
+# | +-----+----------------------+ 2001:db8:2::1/64 |
+# | | |
+# | + br1.10 |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ pvid_set_unset
+ ping_ipv4
+ ping_ipv6
+ pvid_set_move
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+ bridge vlan add dev br1 vid 10 self
+ bridge vlan add dev $swp1 vid 10
+ vlan_create br1 10 "" 192.0.2.2/28 2001:db8:1::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy br1 10
+ bridge vlan del dev $swp1 vid 10
+ bridge vlan del dev br1 vid 10 self
+
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+pvid_set_unset()
+{
+ log_info "Set and unset PVID on VLAN 10"
+
+ bridge vlan add dev br1 vid 10 pvid self
+ sleep 1
+ bridge vlan add dev br1 vid 10 self
+}
+
+pvid_set_move()
+{
+ log_info "Set PVID on VLAN 10, then move it to VLAN 20"
+
+ bridge vlan add dev br1 vid 10 pvid self
+ sleep 1
+ bridge vlan add dev br1 vid 20 pvid self
+}
+
+shuffle_vlan()
+{
+ log_info ""
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
new file mode 100755
index 000000000000..3885a2a91f7d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -0,0 +1,228 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | |
+# +----|------------------+ +------------------|---+
+# | |
+# +----|-------------------------------------------------------------------|---+
+# | SW | | |
+# | +-|-------------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_port_range_ipv4_udp
+ test_port_range_ipv4_tcp
+ test_port_range_ipv6_udp
+ test_port_range_ipv6_tcp
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+__test_port_range()
+{
+ local proto=$1; shift
+ local ip_proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=100
+ local sport_max=200
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport_min=300
+ local dport_max=400
+ local dport_mid=$((dport_min + (dport_max - dport_min) / 2))
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport_min-$dport_max \
+ action pass
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport_min-$dport_max \
+ action drop
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport_min"
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Ingress filter not hit with minimum ports"
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Egress filter not hit with minimum ports"
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport_mid"
+ tc_check_packets "dev $swp1 ingress" 101 2
+ check_err $? "Ingress filter not hit with middle ports"
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Egress filter not hit with middle ports"
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport_max"
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Ingress filter not hit with maximum ports"
+ tc_check_packets "dev $swp2 egress" 101 3
+ check_err $? "Egress filter not hit with maximum ports"
+
+ # Send traffic when both ports are out of range and when only one port
+ # is out of range.
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport_min"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport_min"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$((dport_min - 1))"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$((dport_max + 1))"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$((dport_max + 1))"
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Ingress filter was hit when should not"
+ tc_check_packets "dev $swp2 egress" 101 3
+ check_err $? "Egress filter was hit when should not"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
+test_port_range_ipv4_udp()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv4_tcp()
+{
+ local proto=ipv4
+ local ip_proto=tcp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 TCP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_udp()
+{
+ local proto=ipv6
+ local ip_proto=udp
+ local sip=2001:db8:1::1
+ local dip=2001:db8:1::2
+ local mode="-6"
+ local name="IPv6 UDP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_tcp()
+{
+ local proto=ipv6
+ local ip_proto=tcp
+ local sip=2001:db8:1::1
+ local dip=2001:db8:1::2
+ local mode="-6"
+ local name="IPv6 TCP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..170728c96c46 100644
--- a/tools/testing/selftests/net/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
@@ -16,6 +16,8 @@
#include <linux/net_tstamp.h>
#include <linux/sockios.h>
+#include "kselftest.h"
+
static int
lookup_value(const char **names, int size, const char *name)
{
@@ -50,7 +52,7 @@ static const char *tx_types[] = {
TX_TYPE(ONESTEP_SYNC)
#undef TX_TYPE
};
-#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+#define N_TX_TYPES ((int)(ARRAY_SIZE(tx_types)))
static const char *rx_filters[] = {
#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
@@ -71,7 +73,7 @@ static const char *rx_filters[] = {
RX_FILTER(PTP_V2_DELAY_REQ),
#undef RX_FILTER
};
-#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+#define N_RX_FILTERS ((int)(ARRAY_SIZE(rx_filters)))
static void usage(void)
{
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index fa9e09ad97d9..85a8ee9395b3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -65,12 +65,15 @@ __chk_nr()
if [ $nr != $expected ]; then
if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then
echo "[ skip ] Feature probably not supported"
+ mptcp_lib_result_skip "${msg}"
else
echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_result_fail "${msg}"
ret=$test_cnt
fi
else
echo "[ ok ]"
+ mptcp_lib_result_pass "${msg}"
fi
test_cnt=$((test_cnt+1))
}
@@ -111,12 +114,15 @@ wait_msk_nr()
printf "%-50s" "$msg"
if [ $i -ge $timeout ]; then
echo "[ fail ] timeout while expecting $expected max $max last $nr"
+ mptcp_lib_result_fail "${msg} # timeout"
ret=$test_cnt
elif [ $nr != $expected ]; then
echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_result_fail "${msg} # unexpected result"
ret=$test_cnt
else
echo "[ ok ]"
+ mptcp_lib_result_pass "${msg}"
fi
test_cnt=$((test_cnt+1))
}
@@ -276,4 +282,5 @@ flush_pids
chk_msk_inuse 0 "....chk 0 msk in use after flush"
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index bbae40882bfa..b1fc8afd072d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -7,6 +7,7 @@ time_start=$(date +%s)
optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
+final_ret=0
sin=""
sout=""
cin_disconnect=""
@@ -128,6 +129,7 @@ ns3="ns3-$rndh"
ns4="ns4-$rndh"
TEST_COUNT=0
+TEST_GROUP=""
cleanup()
{
@@ -285,6 +287,7 @@ check_mptcp_disabled()
# net.mptcp.enabled should be enabled by default
if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
+ mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default"
ret=1
return 1
fi
@@ -297,11 +300,13 @@ check_mptcp_disabled()
if [ ${err} -eq 0 ]; then
echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
+ mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl"
ret=1
return 1
fi
echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+ mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl"
return 0
}
@@ -317,14 +322,16 @@ do_ping()
local connector_ns="$2"
local connect_addr="$3"
local ping_args="-q -c 1"
+ local rc=0
if is_v6 "${connect_addr}"; then
$ipv6 || return 0
ping_args="${ping_args} -6"
fi
- ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
+ ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1
+
+ if [ $rc -ne 0 ] ; then
echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
ret=1
@@ -403,7 +410,9 @@ do_transfer()
local addr_port
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
- printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
+ local result_msg
+ result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+ printf "%s\t" "${result_msg}"
if $capture; then
local capuser
@@ -478,6 +487,7 @@ do_transfer()
local duration
duration=$((stop-start))
+ result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo "[ FAIL ] client exit code $retc, server $rets" 1>&2
@@ -490,6 +500,7 @@ do_transfer()
echo
cat "$capout"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
return 1
fi
@@ -549,6 +560,9 @@ do_transfer()
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
printf "[ OK ]"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ else
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi
if [ $cookies -eq 2 ];then
@@ -691,6 +705,8 @@ run_test_transparent()
local lret=0
local r6flag=""
+ TEST_GROUP="${msg}"
+
# skip if we don't want v6
if ! $ipv6 && is_v6 "${connect_addr}"; then
return 0
@@ -702,6 +718,7 @@ run_test_transparent()
# checking for a specific kernel version.
if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
echo "INFO: ${msg} not supported by the kernel: SKIP"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -719,6 +736,7 @@ EOF
if [ $? -ne 0 ]; then
echo "SKIP: $msg, could not load nft ruleset"
mptcp_lib_fail_if_expected_feature "nft rules"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -735,6 +753,7 @@ EOF
ip netns exec "$listener_ns" nft flush ruleset
echo "SKIP: $msg, ip $r6flag rule failed"
mptcp_lib_fail_if_expected_feature "ip rule"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -744,6 +763,7 @@ EOF
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
echo "SKIP: $msg, ip route add local $local_addr failed"
mptcp_lib_fail_if_expected_feature "ip route"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -773,6 +793,7 @@ run_tests_peekmode()
{
local peekmode="$1"
+ TEST_GROUP="peek mode: ${peekmode}"
echo "INFO: with peek mode: ${peekmode}"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
@@ -780,8 +801,11 @@ run_tests_peekmode()
run_tests_mptfo()
{
+ TEST_GROUP="MPTFO"
+
if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
echo "INFO: TFO not supported by the kernel: SKIP"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -805,8 +829,11 @@ run_tests_disconnect()
local old_cin=$cin
local old_sin=$sin
+ TEST_GROUP="full disconnect"
+
if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
echo "INFO: Full disconnect not supported: SKIP"
+ mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -837,14 +864,26 @@ display_time()
echo "Time: ${time_run} seconds"
}
-stop_if_error()
+log_if_error()
{
local msg="$1"
if [ ${ret} -ne 0 ]; then
echo "FAIL: ${msg}" 1>&2
+
+ final_ret=${ret}
+ ret=0
+
+ return ${final_ret}
+ fi
+}
+
+stop_if_error()
+{
+ if ! log_if_error "${@}"; then
display_time
- exit ${ret}
+ mptcp_lib_result_print_all_tap
+ exit ${final_ret}
fi
}
@@ -874,6 +913,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
+mptcp_lib_result_code "${ret}" "ping tests"
+
stop_if_error "Could not even run ping tests"
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
@@ -903,12 +944,15 @@ echo "on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
+TEST_GROUP="loopback v4"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
stop_if_error "Could not even run loopback test"
+TEST_GROUP="loopback v6"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
stop_if_error "Could not even run loopback v6 test"
+TEST_GROUP="multihosts"
for sender in $ns1 $ns2 $ns3 $ns4;do
# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
# mptcp syncookie support.
@@ -934,23 +978,25 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender 10.0.3.1
run_tests "$ns4" $sender dead:beef:3::1
- stop_if_error "Tests with $sender as a sender have failed"
+ log_if_error "Tests with $sender as a sender have failed"
done
run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
-stop_if_error "Tests with peek mode have failed"
+log_if_error "Tests with peek mode have failed"
# MPTFO (MultiPath TCP Fatopen tests)
run_tests_mptfo
-stop_if_error "Tests with MPTFO have failed"
+log_if_error "Tests with MPTFO have failed"
# connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6"
-stop_if_error "Tests with tproxy have failed"
+log_if_error "Tests with tproxy have failed"
run_tests_disconnect
+log_if_error "Tests of the full disconnection have failed"
display_time
-exit $ret
+mptcp_lib_result_print_all_tap
+exit ${final_ret}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 3c2096ac97ef..1640749750d4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -39,7 +39,9 @@ evts_ns1=""
evts_ns2=""
evts_ns1_pid=0
evts_ns2_pid=0
-stats_dumped=0
+last_test_failed=0
+last_test_skipped=0
+last_test_ignored=1
declare -A all_tests
declare -a only_tests_ids
@@ -49,11 +51,15 @@ TEST_COUNT=0
TEST_NAME=""
nr_blank=40
-export FAILING_LINKS=""
-export test_linkfail=0
-export addr_nr_ns1=0
-export addr_nr_ns2=0
-export sflags=""
+# These var are used only in some tests, make sure they are not already set
+unset FAILING_LINKS
+unset test_linkfail
+unset addr_nr_ns1
+unset addr_nr_ns2
+unset sflags
+unset fastclose
+unset fullmesh
+unset speed
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -97,10 +103,8 @@ init_partial()
fi
done
- stats_dumped=0
check_invert=0
validate_checksum=$checksum
- FAILING_LINKS=""
# ns1 ns2
# ns1eth1 ns2eth1
@@ -211,6 +215,8 @@ mark_as_skipped()
print_title "[ skip ] ${msg}"
printf "\n"
+
+ last_test_skipped=1
}
# $@: condition
@@ -243,14 +249,32 @@ skip_test()
return 0
}
+append_prev_results()
+{
+ if [ ${last_test_failed} -eq 1 ]; then
+ mptcp_lib_result_fail "${TEST_NAME}"
+ elif [ ${last_test_skipped} -eq 1 ]; then
+ mptcp_lib_result_skip "${TEST_NAME}"
+ elif [ ${last_test_ignored} -ne 1 ]; then
+ mptcp_lib_result_pass "${TEST_NAME}"
+ fi
+
+ last_test_failed=0
+ last_test_skipped=0
+ last_test_ignored=0
+}
+
# $1: test name
reset()
{
+ append_prev_results
+
TEST_NAME="${1}"
TEST_COUNT=$((TEST_COUNT+1))
if skip_test; then
+ last_test_ignored=1
return 1
fi
@@ -437,10 +461,13 @@ reset_with_tcp_filter()
fail_test()
{
ret=1
- failed_tests[${TEST_COUNT}]="${TEST_NAME}"
- [ "${stats_dumped}" = 0 ] && dump_stats
- stats_dumped=1
+ # just in case a test is marked twice as failed
+ if [ ${last_test_failed} -eq 0 ]; then
+ failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+ dump_stats
+ last_test_failed=1
+ fi
}
get_failed_tests_ids()
@@ -826,6 +853,17 @@ pm_nl_set_endpoint()
local connector_ns="$2"
local connect_addr="$3"
+ local addr_nr_ns1=${addr_nr_ns1:-0}
+ local addr_nr_ns2=${addr_nr_ns2:-0}
+ local sflags=${sflags:-""}
+ local fullmesh=${fullmesh:-""}
+
+ local flags="subflow"
+ if [ -n "${fullmesh}" ]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${fullmesh}
+ fi
+
# let the mptcp subflow be established in background before
# do endpoint manipulation
if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then
@@ -973,10 +1011,12 @@ do_transfer()
local cl_proto="$3"
local srv_proto="$4"
local connect_addr="$5"
- local speed="$6"
local port=$((10000 + TEST_COUNT - 1))
local cappid
+ local FAILING_LINKS=${FAILING_LINKS:-""}
+ local fastclose=${fastclose:-""}
+ local speed=${speed:-"fast"}
:> "$cout"
:> "$sout"
@@ -1009,15 +1049,14 @@ do_transfer()
extra_args="-j"
elif [ $speed = "slow" ]; then
extra_args="-r 50"
- elif [[ $speed = "speed_"* ]]; then
- extra_args="-r ${speed:6}"
+ elif [ $speed -gt 0 ]; then
+ extra_args="-r ${speed}"
fi
- local flags="subflow"
local extra_cl_args=""
local extra_srv_args=""
local trunc_size=""
- if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
+ if [ -n "${fastclose}" ]; then
if [ ${test_linkfail} -le 1 ]; then
echo "fastclose tests need test_linkfail argument"
fail_test
@@ -1026,7 +1065,7 @@ do_transfer()
# disconnect
trunc_size=${test_linkfail}
- local side=${addr_nr_ns2:10}
+ local side=${fastclose}
if [ ${side} = "client" ]; then
extra_cl_args="-f ${test_linkfail}"
@@ -1039,10 +1078,6 @@ do_transfer()
fail_test
return 1
fi
- addr_nr_ns2=0
- elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
- flags="${flags},fullmesh"
- addr_nr_ns2=${addr_nr_ns2:9}
fi
extra_srv_args="$extra_args $extra_srv_args"
@@ -1153,9 +1188,9 @@ run_tests()
local listener_ns="$1"
local connector_ns="$2"
local connect_addr="$3"
- local speed="${4:-fast}"
local size
+ local test_linkfail=${test_linkfail:-0}
# The values above 2 are reused to make test files
# with the given sizes (KB)
@@ -1197,7 +1232,7 @@ run_tests()
make_file "$sinfail" "server" $size
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${speed}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
}
dump_stats()
@@ -1986,7 +2021,8 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
fi
@@ -1997,7 +2033,8 @@ subflows_error_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
fi
@@ -2008,7 +2045,8 @@ subflows_error_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
fi
@@ -2020,7 +2058,8 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 slow &
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 &
# mpj subflow will be in TW after the reset
wait_attempt_fail $ns2
@@ -2119,7 +2158,8 @@ signal_address_tests()
# the peer could possibly miss some addr notification, allow retransmission
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
# It is not directly linked to the commit introducing this
# symbol but for the parent one which is linked anyway.
@@ -2229,7 +2269,8 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_tx_nr 4 4
chk_add_nr 4 0
@@ -2240,7 +2281,8 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 1 1 1
chk_add_nr 4 0
fi
@@ -2251,7 +2293,8 @@ add_addr_timeout_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_set_limits $ns2 2 2
- run_tests $ns1 $ns2 10.0.1.1 speed_10
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 8 0
fi
@@ -2262,7 +2305,8 @@ add_addr_timeout_tests()
pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_set_limits $ns2 2 2
- run_tests $ns1 $ns2 10.0.1.1 speed_10
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 8 0
fi
@@ -2275,8 +2319,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns2=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_rm_tx_nr 1
chk_rm_nr 1 1
@@ -2288,8 +2332,8 @@ remove_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns2=-2 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=-2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_rm_nr 2 2
fi
@@ -2299,8 +2343,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- addr_nr_ns1=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2312,8 +2356,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns1=-1 addr_nr_ns2=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
@@ -2326,8 +2370,8 @@ remove_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- addr_nr_ns1=-1 addr_nr_ns2=-2 \
- run_tests $ns1 $ns2 10.0.1.1 speed_10
+ addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
@@ -2340,8 +2384,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- addr_nr_ns1=-3 \
- run_tests $ns1 $ns2 10.0.1.1 speed_10
+ addr_nr_ns1=-3 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert
@@ -2354,8 +2398,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- addr_nr_ns1=-3 \
- run_tests $ns1 $ns2 10.0.1.1 speed_10
+ addr_nr_ns1=-3 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
@@ -2368,8 +2412,8 @@ remove_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- addr_nr_ns1=-8 addr_nr_ns2=-8 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
@@ -2382,8 +2426,8 @@ remove_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- addr_nr_ns1=-8 addr_nr_ns2=-8 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
if mptcp_lib_kversion_ge 5.18; then
@@ -2401,8 +2445,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- addr_nr_ns1=-8 addr_nr_ns2=-8 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert simult
@@ -2415,8 +2459,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- addr_nr_ns1=-8 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
@@ -2427,8 +2471,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns2=-9 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=-9 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_rm_nr 1 1
fi
@@ -2438,8 +2482,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- addr_nr_ns1=-9 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-9 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2452,8 +2496,8 @@ add_tests()
if reset "add single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
- addr_nr_ns2=1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
fi
@@ -2461,8 +2505,8 @@ add_tests()
if reset "add signal address"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
- addr_nr_ns1=1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
fi
@@ -2471,8 +2515,8 @@ add_tests()
if reset "add multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- addr_nr_ns2=2 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
fi
@@ -2480,8 +2524,8 @@ add_tests()
if reset "add multiple subflows IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- addr_nr_ns2=2 \
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ addr_nr_ns2=2 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 2 2 2
fi
@@ -2489,8 +2533,8 @@ add_tests()
if reset "add multiple addresses IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
- addr_nr_ns1=2 \
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ addr_nr_ns1=2 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 2 2 2
chk_add_nr 2 2
fi
@@ -2503,14 +2547,16 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 1 1 1
fi
# add_address, unused IPv6
if reset "unused signal address IPv6"; then
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 0 0 0
chk_add_nr 1 1
fi
@@ -2520,7 +2566,8 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 1 1 1
chk_add_nr 1 1
fi
@@ -2530,8 +2577,8 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 1
- addr_nr_ns1=-1 \
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2543,8 +2590,8 @@ ipv6_tests()
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
- addr_nr_ns1=-1 addr_nr_ns2=-1 \
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
@@ -2645,7 +2692,8 @@ mixed_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
fi
@@ -2655,7 +2703,8 @@ mixed_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
- run_tests $ns1 $ns2 dead:beef:2::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:2::1
chk_join_nr 1 1 1
fi
@@ -2666,7 +2715,8 @@ mixed_tests()
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
- run_tests $ns1 $ns2 dead:beef:2::1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:2::1
chk_join_nr 1 1 1
fi
@@ -2678,8 +2728,8 @@ mixed_tests()
pm_nl_set_limits $ns2 2 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- addr_nr_ns2=fullmesh_1 \
- run_tests $ns1 $ns2 dead:beef:1::1 slow
+ fullmesh=1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 4 4 4
fi
}
@@ -2692,8 +2742,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
- sflags=nobackup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=nobackup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_prio_nr 0 1
fi
@@ -2704,8 +2754,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- sflags=backup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
@@ -2717,8 +2767,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 1
- sflags=backup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
@@ -2727,7 +2777,8 @@ backup_tests()
if reset "mpc backup" &&
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
@@ -2736,7 +2787,8 @@ backup_tests()
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 slow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 1 1
fi
@@ -2744,8 +2796,8 @@ backup_tests()
if reset "mpc switch to backup" &&
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- sflags=backup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
@@ -2754,8 +2806,8 @@ backup_tests()
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- sflags=backup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 1 1
fi
@@ -2844,8 +2896,8 @@ add_addr_ports_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 1
- addr_nr_ns1=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1 1
chk_rm_nr 1 1 invert
@@ -2861,8 +2913,8 @@ add_addr_ports_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns1=-1 addr_nr_ns2=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 1 1 1
chk_rm_nr 1 1
@@ -2875,8 +2927,8 @@ add_addr_ports_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- addr_nr_ns1=-8 addr_nr_ns2=-2 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
@@ -3078,8 +3130,8 @@ fullmesh_tests()
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
- addr_nr_ns1=1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns1=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 4 4 4
chk_add_nr 1 1
fi
@@ -3091,8 +3143,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- addr_nr_ns2=fullmesh_1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ fullmesh=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
fi
@@ -3104,8 +3156,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 2 5
pm_nl_set_limits $ns2 1 5
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- addr_nr_ns2=fullmesh_2 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ fullmesh=2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 5 5 5
chk_add_nr 1 1
fi
@@ -3118,8 +3170,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 2 4
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- addr_nr_ns2=fullmesh_2 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ fullmesh=2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 4 4 4
chk_add_nr 1 1
fi
@@ -3130,8 +3182,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
- addr_nr_ns2=1 sflags=fullmesh \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=1 sflags=fullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_rm_nr 0 1
fi
@@ -3142,8 +3194,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
pm_nl_set_limits $ns2 4 4
- addr_nr_ns2=fullmesh_1 sflags=nofullmesh \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ fullmesh=1 sflags=nofullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_rm_nr 0 1
fi
@@ -3154,8 +3206,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
- addr_nr_ns2=1 sflags=backup,fullmesh \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_rm_nr 0 1
@@ -3167,8 +3219,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_set_limits $ns2 4 4
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
- sflags=nobackup,nofullmesh \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=nobackup,nofullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_rm_nr 0 1
@@ -3178,7 +3230,7 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
- test_linkfail=1024 addr_nr_ns2=fastclose_client \
+ test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_fclose_nr 1 1
@@ -3186,7 +3238,7 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
- test_linkfail=1024 addr_nr_ns2=fastclose_server \
+ test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
@@ -3343,8 +3395,8 @@ userspace_tests()
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- sflags=backup \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 0
chk_prio_nr 0 0
fi
@@ -3357,8 +3409,8 @@ userspace_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- addr_nr_ns2=-1 \
- run_tests $ns1 $ns2 10.0.1.1 slow
+ addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_rm_nr 0 0
fi
@@ -3368,7 +3420,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 speed_10 &
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr 10.0.2.1 10
@@ -3388,7 +3441,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- run_tests $ns1 $ns2 10.0.1.1 speed_10 &
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf 10.0.3.2 20
@@ -3411,7 +3465,8 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 slow 2>/dev/null &
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
wait_mpj $ns1
pm_nl_check_endpoint 1 "creation" \
@@ -3434,8 +3489,8 @@ endpoint_tests()
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- test_linkfail=4 \
- run_tests $ns1 $ns2 10.0.1.1 speed_20 2>/dev/null &
+ test_linkfail=4 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
wait_mpj $ns2
chk_subflow_nr needtitle "before delete" 2
@@ -3566,4 +3621,7 @@ if [ ${ret} -ne 0 ]; then
echo
fi
+append_prev_results
+mptcp_lib_result_print_all_tap
+
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index f32045b23b89..b1a0fdd0408b 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -1,8 +1,12 @@
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
+readonly KSFT_PASS=0
readonly KSFT_FAIL=1
readonly KSFT_SKIP=4
+readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g')
+
+MPTCP_LIB_SUBTESTS=()
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
@@ -102,3 +106,65 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+
+__mptcp_lib_result_add() {
+ local result="${1}"
+ shift
+
+ local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
+
+ MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+}
+
+# $1: test name
+mptcp_lib_result_pass() {
+ __mptcp_lib_result_add "ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_fail() {
+ __mptcp_lib_result_add "not ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_skip() {
+ __mptcp_lib_result_add "ok" "${1} # SKIP"
+}
+
+# $1: result code ; $2: test name
+mptcp_lib_result_code() {
+ local ret="${1}"
+ local name="${2}"
+
+ case "${ret}" in
+ "${KSFT_PASS}")
+ mptcp_lib_result_pass "${name}"
+ ;;
+ "${KSFT_FAIL}")
+ mptcp_lib_result_fail "${name}"
+ ;;
+ "${KSFT_SKIP}")
+ mptcp_lib_result_skip "${name}"
+ ;;
+ *)
+ echo "ERROR: wrong result code: ${ret}"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+}
+
+mptcp_lib_result_print_all_tap() {
+ local subtest
+
+ if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] ||
+ [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then
+ return
+ fi
+
+ printf "\nTAP version 13\n"
+ printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}"
+
+ for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+ printf "%s\n" "${subtest}"
+ done
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index dc8d473fc82c..8c8694f21e7d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -183,11 +183,13 @@ do_transfer()
local mptcp_connect="./mptcp_connect -r 20"
- local local_addr
+ local local_addr ip
if is_v6 "${connect_addr}"; then
local_addr="::"
+ ip=ipv6
else
local_addr="0.0.0.0"
+ ip=ipv4
fi
cmsg="TIMESTAMPNS"
@@ -223,6 +225,8 @@ do_transfer()
echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ mptcp_lib_result_fail "transfer ${ip}"
+
ret=1
return 1
fi
@@ -236,9 +240,11 @@ do_transfer()
fi
check_transfer $cin $sout "file received by server"
-
rets=$?
+ mptcp_lib_result_code "${retc}" "mark ${ip}"
+ mptcp_lib_result_code "${rets}" "transfer ${ip}"
+
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
return 0
fi
@@ -264,6 +270,7 @@ do_mptcp_sockopt_tests()
if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
echo "INFO: MPTCP sockopt not supported: SKIP"
+ mptcp_lib_result_skip "sockopt"
return
fi
@@ -272,18 +279,22 @@ do_mptcp_sockopt_tests()
if [ $lret -ne 0 ]; then
echo "FAIL: SOL_MPTCP getsockopt" 1>&2
+ mptcp_lib_result_fail "sockopt v4"
ret=$lret
return
fi
+ mptcp_lib_result_pass "sockopt v4"
ip netns exec "$ns_sbox" ./mptcp_sockopt -6
lret=$?
if [ $lret -ne 0 ]; then
echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2
+ mptcp_lib_result_fail "sockopt v6"
ret=$lret
return
fi
+ mptcp_lib_result_pass "sockopt v6"
}
run_tests()
@@ -310,10 +321,12 @@ do_tcpinq_test()
if [ $lret -ne 0 ];then
ret=$lret
echo "FAIL: mptcp_inq $@" 1>&2
+ mptcp_lib_result_fail "TCP_INQ: $*"
return $lret
fi
echo "PASS: TCP_INQ cmsg/ioctl $@"
+ mptcp_lib_result_pass "TCP_INQ: $*"
return $lret
}
@@ -323,6 +336,7 @@ do_tcpinq_tests()
if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
echo "INFO: TCP_INQ not supported: SKIP"
+ mptcp_lib_result_skip "TCP_INQ"
return
fi
@@ -367,4 +381,6 @@ if [ $ret -eq 0 ];then
fi
do_tcpinq_tests
+
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index d02e0d63a8f9..f32038fe1ee5 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -58,16 +58,19 @@ check()
local out=`$cmd 2>$err`
local cmd_ret=$?
- printf "%-50s %s" "$msg"
+ printf "%-50s" "$msg"
if [ $cmd_ret -ne 0 ]; then
echo "[FAIL] command execution '$cmd' stderr "
cat $err
+ mptcp_lib_result_fail "${msg} # error ${cmd_ret}"
ret=1
elif [ "$out" = "$expected" ]; then
echo "[ OK ]"
+ mptcp_lib_result_pass "${msg}"
else
echo -n "[FAIL] "
echo "expected '$expected' got '$out'"
+ mptcp_lib_result_fail "${msg} # different output"
ret=1
fi
}
@@ -193,4 +196,5 @@ subflow 10.0.1.1" " (nofullmesh)"
subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
fi
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 36a3c9d92e20..ce9203b817f8 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -261,6 +261,7 @@ run_test()
printf "%-60s" "$msg"
do_transfer $small $large $time
lret=$?
+ mptcp_lib_result_code "${lret}" "${msg}"
if [ $lret -ne 0 ]; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
@@ -269,6 +270,7 @@ run_test()
printf "%-60s" "$msg - reverse direction"
do_transfer $large $small $time
lret=$?
+ mptcp_lib_result_code "${lret}" "${msg}"
if [ $lret -ne 0 ]; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
@@ -305,4 +307,6 @@ run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
run_test 30 10 0 0 "unbalanced bwidth"
run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b180133a30af..23f8959a8ea8 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -1,6 +1,13 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it.
+#shellcheck disable=SC2086
+
+# Some variables are used below but indirectly, see check_expected_one()
+#shellcheck disable=SC2034
+
. "$(dirname "${0}")/mptcp_lib.sh"
mptcp_lib_check_mptcp
@@ -11,8 +18,7 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
exit ${KSFT_SKIP}
fi
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! ip -Version &> /dev/null; then
echo "SKIP: Cannot not run test without ip tool"
exit ${KSFT_SKIP}
fi
@@ -52,10 +58,54 @@ sec=$(date +%s)
rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"
+ret=0
+test_name=""
+
+_printf() {
+ stdbuf -o0 -e0 printf "${@}"
+}
print_title()
{
- stdbuf -o0 -e0 printf "INFO: %s\n" "${1}"
+ _printf "INFO: %s\n" "${1}"
+}
+
+# $1: test name
+print_test()
+{
+ test_name="${1}"
+
+ _printf "%-63s" "${test_name}"
+}
+
+print_results()
+{
+ _printf "[%s]\n" "${1}"
+}
+
+test_pass()
+{
+ print_results " OK "
+ mptcp_lib_result_pass "${test_name}"
+}
+
+test_skip()
+{
+ print_results "SKIP"
+ mptcp_lib_result_skip "${test_name}"
+}
+
+# $1: msg
+test_fail()
+{
+ print_results "FAIL"
+ ret=1
+
+ if [ -n "${1}" ]; then
+ _printf "\t%s\n" "${1}"
+ fi
+
+ mptcp_lib_result_fail "${test_name}"
}
kill_wait()
@@ -67,6 +117,8 @@ kill_wait()
wait $1 2>/dev/null
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
print_title "Cleanup"
@@ -86,7 +138,7 @@ cleanup()
rm -rf $file $client_evts $server_evts
- stdbuf -o0 -e0 printf "Done\n"
+ _printf "Done\n"
}
trap cleanup EXIT
@@ -118,7 +170,8 @@ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
ip -net "$ns2" link set ns2eth1 up
print_title "Init"
-stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n"
+print_test "Created network namespaces ns1, ns2"
+test_pass
make_file()
{
@@ -203,16 +256,14 @@ make_connection()
server_serverside=$(grep "type:1," "$server_evts" |
sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
- stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t" $is_v6
+ print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
[ "$server_serverside" = 1 ]
then
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
else
- stdbuf -o0 -e0 printf "[FAIL]\n"
- stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \
- "${client_token}" "${server_token}" \
- "${client_serverside}" "${server_serverside}"
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ mptcp_lib_result_print_all_tap
exit 1
fi
@@ -246,10 +297,10 @@ check_expected_one()
if [ "${prev_ret}" = "0" ]
then
- stdbuf -o0 -e0 printf "[FAIL]\n"
+ test_fail
fi
- stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
+ _printf "\tExpected value for '%s': '%s', got '%s'.\n" \
"${var}" "${!exp}" "${!var}"
return 1
}
@@ -257,21 +308,21 @@ check_expected_one()
# $@: all var names to check
check_expected()
{
- local ret=0
+ local rc=0
local var
for var in "${@}"
do
- check_expected_one "${var}" "${ret}" || ret=1
+ check_expected_one "${var}" "${rc}" || rc=1
done
- if [ ${ret} -eq 0 ]
+ if [ ${rc} -eq 0 ]
then
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
return 0
fi
- exit 1
+ return 1
}
verify_announce_event()
@@ -317,13 +368,12 @@ test_announce()
local type
type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
- stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t"
+ print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
if [ "$type" = "" ]
then
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
else
- stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}"
- exit 1
+ test_fail "type defined: ${type}"
fi
# ADD_ADDR from the client to server machine reusing the subflow port
@@ -331,7 +381,7 @@ test_announce()
ip netns exec "$ns2"\
./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
ns2eth1 > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id
+ print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
"$client4_port"
@@ -340,7 +390,7 @@ test_announce()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl ann\
dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id
+ print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
"$client_addr_id" "$client6_port" "v6"
@@ -350,7 +400,7 @@ test_announce()
client_addr_id=$((client_addr_id+1))
ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
$client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id
+ print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
"$client_addr_id" "$new4_port"
@@ -361,7 +411,7 @@ test_announce()
# ADD_ADDR from the server to client machine reusing the subflow port
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2 > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id
+ print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$app4_port"
@@ -370,7 +420,7 @@ test_announce()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
$server_addr_id dev ns1eth2 > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+ print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
"$server_addr_id" "$app6_port" "v6"
@@ -380,7 +430,7 @@ test_announce()
server_addr_id=$((server_addr_id+1))
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id
+ print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$new4_port"
@@ -414,39 +464,34 @@ test_remove()
local invalid_token=$(( client4_token - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
$client_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\
- $client_addr_id
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
local type
type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
if [ "$type" = "" ]
then
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
else
- stdbuf -o0 -e0 printf "[FAIL]\n"
- exit 1
+ test_fail
fi
# RM_ADDR using an invalid addr id should result in no action
local invalid_id=$(( client_addr_id + 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$invalid_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\
- $invalid_id
+ print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
if [ "$type" = "" ]
then
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
else
- stdbuf -o0 -e0 printf "[FAIL]\n"
- exit 1
+ test_fail
fi
# RM_ADDR from the client to server machine
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\
- $client_addr_id
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -455,8 +500,7 @@ test_remove()
client_addr_id=$(( client_addr_id - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\
- $client_addr_id
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -464,8 +508,7 @@ test_remove()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
$client_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\
- $client_addr_id
+ print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
@@ -475,8 +518,7 @@ test_remove()
# RM_ADDR from the server to client machine
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\
- $server_addr_id
+ print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -485,7 +527,7 @@ test_remove()
server_addr_id=$(( server_addr_id - 1 ))
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id
+ print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -493,7 +535,7 @@ test_remove()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
$server_addr_id > /dev/null 2>&1
- stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id
+ print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
}
@@ -520,25 +562,24 @@ verify_subflow_events()
local dport
local locid
local remid
+ local info
+
+ info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})"
if [ "$e_type" = "$SUB_ESTABLISHED" ]
then
if [ "$e_family" = "$AF_INET6" ]
then
- stdbuf -o0 -e0 printf "CREATE_SUBFLOW6 %s (%s) => %s (%s) "\
- "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ print_test "CREATE_SUBFLOW6 ${info}"
else
- stdbuf -o0 -e0 printf "CREATE_SUBFLOW %s (%s) => %s (%s) \t"\
- "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ print_test "CREATE_SUBFLOW ${info}"
fi
else
if [ "$e_family" = "$AF_INET6" ]
then
- stdbuf -o0 -e0 printf "DESTROY_SUBFLOW6 %s (%s) => %s (%s) "\
- "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ print_test "DESTROY_SUBFLOW6 ${info}"
else
- stdbuf -o0 -e0 printf "DESTROY_SUBFLOW %s (%s) => %s (%s) \t"\
- "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ print_test "DESTROY_SUBFLOW ${info}"
fi
fi
@@ -809,7 +850,7 @@ test_subflows_v4_v6_mix()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
$server_addr_id dev ns1eth2 > /dev/null 2>&1
- stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+ print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
"$server_addr_id" "$app6_port"
@@ -854,25 +895,23 @@ test_prio()
sleep 0.5
# Check TX
- stdbuf -o0 -e0 printf "MP_PRIO TX \t"
+ print_test "MP_PRIO TX"
count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
- stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
- exit 1
+ test_fail "Count != 1: ${count}"
else
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
fi
# Check RX
- stdbuf -o0 -e0 printf "MP_PRIO RX \t"
+ print_test "MP_PRIO RX"
count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
- stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
- exit 1
+ test_fail "Count != 1: ${count}"
else
- stdbuf -o0 -e0 printf "[OK]\n"
+ test_pass
fi
}
@@ -889,11 +928,9 @@ verify_listener_events()
local sport
if [ $e_type = $LISTENER_CREATED ]; then
- stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\
- $e_saddr $e_sport
+ print_test "CREATE_LISTENER $e_saddr:$e_sport"
elif [ $e_type = $LISTENER_CLOSED ]; then
- stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\
- $e_saddr $e_sport
+ print_test "CLOSE_LISTENER $e_saddr:$e_sport"
fi
type=$(grep "type:$e_type," $evt |
@@ -918,7 +955,8 @@ test_listener()
print_title "Listener tests"
if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
- stdbuf -o0 -e0 printf "LISTENER events \t[SKIP] Not supported\n"
+ print_test "LISTENER events"
+ test_skip
return
fi
@@ -961,4 +999,5 @@ test_subflows_v4_v6_mix
test_prio
test_listener
-exit 0
+mptcp_lib_result_print_all_tap
+exit ${ret}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index faa884385c45..6e4fef560873 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -14,6 +14,8 @@
#include <arpa/inet.h>
#include <unistd.h>
+#include "kselftest.h"
+
#define DATA_LEN 100
#define DATA_CHAR 'a'
#define DATA_CHAR_1 'b'
@@ -63,7 +65,7 @@ static __maybe_unused void pair_udp_setfilter(int fd)
struct sock_fprog bpf_prog;
bpf_prog.filter = bpf_filter;
- bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+ bpf_prog.len = ARRAY_SIZE(bpf_filter);
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
sizeof(bpf_prog))) {
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index ba286d680fd9..488f4964365e 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -21,6 +21,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_offload
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -643,6 +644,88 @@ kci_test_macsec()
echo "PASS: macsec"
}
+kci_test_macsec_offload()
+{
+ sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+ probed=false
+ local ret=0
+
+ ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ if [ $? -ne 0 ]; then
+ echo "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ # setup netdevsim since dummydev doesn't have offload support
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ modprobe -q netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "SKIP: macsec_offload can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
+ fi
+
+ echo "0" > /sys/bus/netdevsim/new_device
+ while [ ! -d $sysfsnet ] ; do :; done
+ udevadm settle
+ dev=`ls $sysfsnet`
+
+ ip link set $dev up
+ if [ ! -d $sysfsd ] ; then
+ echo "FAIL: macsec_offload can't create device $dev"
+ return 1
+ fi
+
+ ethtool -k $dev | grep -q 'macsec-hw-offload: on'
+ if [ $? -eq 1 ] ; then
+ echo "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
+ return 1
+ fi
+
+ ip link add link $dev kci_macsec1 type macsec port 4 offload mac
+ check_err $?
+
+ ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+ check_err $?
+
+ ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
+ check_err $?
+
+ ip link add link $dev kci_macsec4 type macsec port 8 offload mac 2> /dev/null
+ check_fail $?
+
+ msname=kci_macsec1
+
+ ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+ key 00 0123456789abcdef0123456789abcdef
+ check_err $?
+
+ ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null
+ check_fail $?
+
+ # clean up any leftovers
+ for msdev in kci_macsec{1,2,3,4} ; do
+ ip link del $msdev 2> /dev/null
+ done
+ echo 0 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: macsec_offload"
+ return 1
+ fi
+ echo "PASS: macsec_offload"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
new file mode 100755
index 000000000000..112cfd8a10ad
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -0,0 +1,759 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge backup port and backup nexthop ID
+# functionality. The topology consists of two bridge (VTEPs) connected using
+# VXLAN. The test checks that when the switch port (swp1) is down, traffic is
+# redirected to the VXLAN port (vx0). When a backup nexthop ID is configured,
+# the test checks that traffic is redirected with the correct nexthop
+# information.
+#
+# +------------------------------------+ +------------------------------------+
+# | + swp1 + vx0 | | + swp1 + vx0 |
+# | | | | | | | |
+# | | br0 | | | | | |
+# | +------------+-----------+ | | +------------+-----------+ |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0.10 | | br0.10 |
+# | 192.0.2.65/28 | | 192.0.2.66/28 |
+# | | | |
+# | | | |
+# | 192.0.2.33 | | 192.0.2.34 |
+# | + lo | | + lo |
+# | | | |
+# | | | |
+# | 192.0.2.49/28 | | 192.0.2.50/28 |
+# | veth0 +-------+ veth0 |
+# | | | |
+# | sw1 | | sw2 |
+# +------------------------------------+ +------------------------------------+
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ backup_port
+ backup_nhid
+ backup_nhid_invalid
+ backup_nhid_ping
+ backup_nhid_torture
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+PING_TIMEOUT=5
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+ local ns=$1; shift
+
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+ local ns
+
+ for ns in sw1 sw2; do
+ setup_topo_ns $ns
+ done
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns sw1 name veth0
+ ip link set dev veth1 netns sw2 name veth0
+}
+
+setup_sw_common()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+ local remote_addr=$1; shift
+ local veth_addr=$1; shift
+ local gw_addr=$1; shift
+ local br_addr=$1; shift
+
+ ip -n $ns address add $local_addr/32 dev lo
+
+ ip -n $ns link set dev veth0 up
+ ip -n $ns address add $veth_addr/28 dev veth0
+ ip -n $ns route add default via $gw_addr
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+ ip -n $ns address add $br_addr/28 dev br0.10
+
+ ip -n $ns link add name swp1 up type dummy
+ ip -n $ns link set dev swp1 master br0
+ bridge -n $ns vlan add vid 10 dev swp1 untagged
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 nolearning external
+ bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+}
+
+setup_sw1()
+{
+ local ns=sw1
+ local local_addr=192.0.2.33
+ local remote_addr=192.0.2.34
+ local veth_addr=192.0.2.49
+ local gw_addr=192.0.2.50
+ local br_addr=192.0.2.65
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+ $br_addr
+}
+
+setup_sw2()
+{
+ local ns=sw2
+ local local_addr=192.0.2.34
+ local remote_addr=192.0.2.33
+ local veth_addr=192.0.2.50
+ local gw_addr=192.0.2.49
+ local br_addr=192.0.2.66
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+ $br_addr
+}
+
+setup()
+{
+ set -e
+
+ setup_topo
+ setup_sw1
+ setup_sw2
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 sw1 sw2; do
+ ip netns del $ns &> /dev/null
+ done
+}
+
+################################################################################
+# Tests
+
+backup_port()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+
+ echo
+ echo "Backup port"
+ echo "-----------"
+
+ run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ # Initial state - check that packets are forwarded out of swp1 when it
+ # has a carrier and not forwarded out of any port when it does not have
+ # a carrier.
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ log_test $? 0 "swp1 carrier on"
+
+ # Configure vx0 as the backup port of swp1 and check that packets are
+ # forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+ # does not have a carrier.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ log_test $? 0 "swp1 carrier on"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ # Remove vx0 as the backup port of swp1 and check that packets are no
+ # longer forwarded out of vx0 when swp1 does not have a carrier.
+ run_cmd "bridge -n sw1 link set dev swp1 nobackup_port"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 1 "vx0 not configured as backup port of swp1"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+}
+
+backup_nhid()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+
+ echo
+ echo "Backup nexthop ID"
+ echo "-----------------"
+
+ run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
+
+ run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo"
+
+ # The first filter matches on packets forwarded using the backup
+ # nexthop ID and the second filter matches on packets forwarded using a
+ # regular VXLAN FDB entry.
+ run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+ run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
+
+ # Configure vx0 as the backup port of swp1 and check that packets are
+ # forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+ # does not have a carrier. When packets are forwarded out of vx0, check
+ # that they are forwarded by the VXLAN FDB entry.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 0
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "Forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ log_test $? 0 "swp1 carrier on"
+
+ # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
+ # that when packets are forwarded out of vx0, they are forwarded using
+ # the backup nexthop ID.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "Forwarding using backup nexthop ID"
+ tc_check_packets sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier on"
+ log_test $? 0 "swp1 carrier on"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ # Reset the backup nexthop ID to 0 and check that packets are no longer
+ # forwarded using the backup nexthop ID when swp1 does not have a
+ # carrier and are instead forwarded by the VXLAN FDB.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\""
+ log_test $? 1 "No backup nexthop ID configured for swp1"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets sw2 "dev vx0 ingress" 102 2
+ log_test $? 0 "Forwarding using VXLAN FDB entry"
+}
+
+backup_nhid_invalid()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+ local tx_drop
+
+ echo
+ echo "Backup nexthop ID - invalid IDs"
+ echo "-------------------------------"
+
+ # Check that when traffic is redirected with an invalid nexthop ID, it
+ # is forwarded out of the VXLAN port, but dropped by the VXLAN driver
+ # and does not crash the host.
+
+ run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ # Drop all other Tx traffic to avoid changes to Tx drop counter.
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
+
+ tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
+
+ run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+
+ # First, check that redirection works.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ log_test $? 0 "Valid nexthop as backup nexthop"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "Forwarding using backup nexthop ID"
+ run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
+ log_test $? 0 "No Tx drop increase"
+
+ # Use a non-existent nexthop ID.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
+ log_test $? 0 "Non-existent nexthop as backup nexthop"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
+ log_test $? 0 "Tx drop increased"
+
+ # Use a blckhole nexthop.
+ run_cmd "ip -n sw1 nexthop replace id 30 blackhole"
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
+ log_test $? 0 "Blackhole nexthop as backup nexthop"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
+ log_test $? 0 "Tx drop increased"
+
+ # Non-group FDB nexthop.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
+ log_test $? 0 "Non-group FDB nexthop as backup nexthop"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
+ log_test $? 0 "Tx drop increased"
+
+ # IPv6 address family nexthop.
+ run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb"
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300"
+ run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
+ log_test $? 0 "IPv6 address family nexthop as backup nexthop"
+
+ run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
+ log_test $? 0 "Tx drop increased"
+}
+
+backup_nhid_ping()
+{
+ local sw1_mac
+ local sw2_mac
+
+ echo
+ echo "Backup nexthop ID - ping"
+ echo "------------------------"
+
+ # Test bidirectional traffic when traffic is redirected in both VTEPs.
+ sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
+ sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
+
+ run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
+ run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
+
+ run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
+ run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
+
+ run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb"
+ run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb"
+
+ run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10"
+
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+ run_cmd "ip -n sw2 link set dev swp1 carrier off"
+
+ run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ log_test $? 0 "Ping with backup nexthop ID"
+
+ # Reset the backup nexthop ID to 0 and check that ping fails.
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0"
+
+ run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ log_test $? 1 "Ping after disabling backup nexthop ID"
+}
+
+backup_nhid_add_del_loop()
+{
+ while true; do
+ ip -n sw1 nexthop del id 10
+ ip -n sw1 nexthop replace id 10 group 1/2 fdb
+ done >/dev/null 2>&1
+}
+
+backup_nhid_torture()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+ local pid1
+ local pid2
+ local pid3
+
+ echo
+ echo "Backup nexthop ID - torture test"
+ echo "--------------------------------"
+
+ # Continuously send traffic through the backup nexthop while adding and
+ # deleting the group. The test is considered successful if nothing
+ # crashed.
+
+ run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "ip -n sw1 link set dev swp1 carrier off"
+
+ backup_nhid_add_del_loop &
+ pid1=$!
+ ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
+ pid2=$!
+
+ sleep 30
+ kill -9 $pid1 $pid2
+ wait $pid1 $pid2 2>/dev/null
+
+ log_test 0 0 "Torture test"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+ -w Timeout for ping
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvhw:" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ w) PING_TIMEOUT=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+ echo "SKIP: Could not run test without tc tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "backup_nhid"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing backup nexthop ID support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index cb553eac9f41..3c4b7fa05075 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -24,7 +24,7 @@ CLANG_FLAGS = -I. -I$(APIDIR) \
$(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
- -O2 -target bpf -emit-llvm -c $< -o - | \
+ -O2 --target=bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
TEST_PROGS += ./tdc.sh
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index ac1bd3ac1533..90718c2fd4ea 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -255,35 +255,142 @@ static void test_stream_multiconn_server(const struct test_opts *opts)
close(fds[i]);
}
-static void test_stream_msg_peek_client(const struct test_opts *opts)
+#define MSG_PEEK_BUF_LEN 64
+
+static void test_msg_peek_client(const struct test_opts *opts,
+ bool seqpacket)
{
+ unsigned char buf[MSG_PEEK_BUF_LEN];
+ ssize_t send_size;
int fd;
+ int i;
+
+ if (seqpacket)
+ fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ else
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
- fd = vsock_stream_connect(opts->peer_cid, 1234);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
}
- send_byte(fd, 1, 0);
+ for (i = 0; i < sizeof(buf); i++)
+ buf[i] = rand() & 0xFF;
+
+ control_expectln("SRVREADY");
+
+ send_size = send(fd, buf, sizeof(buf), 0);
+
+ if (send_size < 0) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ if (send_size != sizeof(buf)) {
+ fprintf(stderr, "Invalid send size %zi\n", send_size);
+ exit(EXIT_FAILURE);
+ }
+
close(fd);
}
-static void test_stream_msg_peek_server(const struct test_opts *opts)
+static void test_msg_peek_server(const struct test_opts *opts,
+ bool seqpacket)
{
+ unsigned char buf_half[MSG_PEEK_BUF_LEN / 2];
+ unsigned char buf_normal[MSG_PEEK_BUF_LEN];
+ unsigned char buf_peek[MSG_PEEK_BUF_LEN];
+ ssize_t res;
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (seqpacket)
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ else
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
}
- recv_byte(fd, 1, MSG_PEEK);
- recv_byte(fd, 1, 0);
+ /* Peek from empty socket. */
+ res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT);
+ if (res != -1) {
+ fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
+ exit(EXIT_FAILURE);
+ }
+
+ if (errno != EAGAIN) {
+ perror("EAGAIN expected");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("SRVREADY");
+
+ /* Peek part of data. */
+ res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK);
+ if (res != sizeof(buf_half)) {
+ fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
+ sizeof(buf_half), res);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Peek whole data. */
+ res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK);
+ if (res != sizeof(buf_peek)) {
+ fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
+ sizeof(buf_peek), res);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Compare partial and full peek. */
+ if (memcmp(buf_half, buf_peek, sizeof(buf_half))) {
+ fprintf(stderr, "Partial peek data mismatch\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (seqpacket) {
+ /* This type of socket supports MSG_TRUNC flag,
+ * so check it with MSG_PEEK. We must get length
+ * of the message.
+ */
+ res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK |
+ MSG_TRUNC);
+ if (res != sizeof(buf_peek)) {
+ fprintf(stderr,
+ "recv(2) + MSG_PEEK | MSG_TRUNC, exp %zu, got %zi\n",
+ sizeof(buf_half), res);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ res = recv(fd, buf_normal, sizeof(buf_normal), 0);
+ if (res != sizeof(buf_normal)) {
+ fprintf(stderr, "recv(2), expected %zu, got %zi\n",
+ sizeof(buf_normal), res);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Compare full peek and normal read. */
+ if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) {
+ fprintf(stderr, "Full peek data mismatch\n");
+ exit(EXIT_FAILURE);
+ }
+
close(fd);
}
+static void test_stream_msg_peek_client(const struct test_opts *opts)
+{
+ return test_msg_peek_client(opts, false);
+}
+
+static void test_stream_msg_peek_server(const struct test_opts *opts)
+{
+ return test_msg_peek_server(opts, false);
+}
+
#define SOCK_BUF_SIZE (2 * 1024 * 1024)
#define MAX_MSG_SIZE (32 * 1024)
@@ -1053,6 +1160,16 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
close(fd);
}
+static void test_seqpacket_msg_peek_client(const struct test_opts *opts)
+{
+ return test_msg_peek_client(opts, true);
+}
+
+static void test_seqpacket_msg_peek_server(const struct test_opts *opts)
+{
+ return test_msg_peek_server(opts, true);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1128,6 +1245,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_virtio_skb_merge_client,
.run_server = test_stream_virtio_skb_merge_server,
},
+ {
+ .name = "SOCK_SEQPACKET MSG_PEEK",
+ .run_client = test_seqpacket_msg_peek_client,
+ .run_server = test_seqpacket_msg_peek_server,
+ },
{},
};