summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/sysctl/net.rst9
-rw-r--r--Documentation/bpf/btf.rst45
-rw-r--r--Documentation/bpf/instruction-set.rst215
-rw-r--r--Documentation/devicetree/bindings/net/cdns,macb.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt22
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/renesas,etheravb.yaml4
-rw-r--r--Documentation/networking/ethtool-netlink.rst8
-rw-r--r--Documentation/networking/mctp.rst48
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp.dtsi8
-rw-r--r--arch/arm64/net/bpf_jit_comp.c5
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c2
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/text-patching.h1
-rw-r--r--arch/x86/kernel/alternative.c34
-rw-r--r--arch/x86/net/bpf_jit_comp.c70
-rw-r--r--drivers/bluetooth/btintel.c11
-rw-r--r--drivers/bluetooth/btintel.h1
-rw-r--r--drivers/bluetooth/btmrvl_debugfs.c2
-rw-r--r--drivers/bluetooth/btmrvl_sdio.c2
-rw-r--r--drivers/bluetooth/btmtk.h35
-rw-r--r--drivers/bluetooth/btmtksdio.c276
-rw-r--r--drivers/bluetooth/btrtl.c8
-rw-r--r--drivers/bluetooth/btusb.c14
-rw-r--r--drivers/bluetooth/hci_h5.c5
-rw-r--r--drivers/bluetooth/hci_ll.c2
-rw-r--r--drivers/bluetooth/hci_serdev.c3
-rw-r--r--drivers/net/bonding/bond_alb.c31
-rw-r--r--drivers/net/bonding/bond_main.c27
-rw-r--r--drivers/net/bonding/bond_procfs.c1
-rw-r--r--drivers/net/dsa/Kconfig12
-rw-r--r--drivers/net/dsa/Makefile3
-rw-r--r--drivers/net/dsa/b53/b53_common.c2
-rw-r--r--drivers/net/dsa/bcm_sf2.c54
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c45
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c9
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c6
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h1
-rw-r--r--drivers/net/dsa/mt7530.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c458
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h7
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.h1
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1_vtu.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h3
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2_scratch.c28
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c7
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h7
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c43
-rw-r--r--drivers/net/dsa/mv88e6xxx/smi.c35
-rw-r--r--drivers/net/dsa/qca/ar9331.c45
-rw-r--r--drivers/net/dsa/qca8k.c807
-rw-r--r--drivers/net/dsa/qca8k.h46
-rw-r--r--drivers/net/dsa/realtek-smi-core.c523
-rw-r--r--drivers/net/dsa/realtek/Kconfig40
-rw-r--r--drivers/net/dsa/realtek/Makefile6
-rw-r--r--drivers/net/dsa/realtek/realtek-mdio.c229
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.c535
-rw-r--r--drivers/net/dsa/realtek/realtek.h (renamed from drivers/net/dsa/realtek-smi-core.h)80
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c (renamed from drivers/net/dsa/rtl8365mb.c)663
-rw-r--r--drivers/net/dsa/realtek/rtl8366-core.c (renamed from drivers/net/dsa/rtl8366.c)164
-rw-r--r--drivers/net/dsa/realtek/rtl8366rb.c (renamed from drivers/net/dsa/rtl8366rb.c)457
-rw-r--r--drivers/net/dsa/xrs700x/xrs700x.c29
-rw-r--r--drivers/net/ethernet/3com/typhoon.c24
-rw-r--r--drivers/net/ethernet/agere/et131x.c14
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c36
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h499
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c152
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h5
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c2
-rw-r--r--drivers/net/ethernet/cadence/macb.h4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c63
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c3
-rw-r--r--drivers/net/ethernet/cortina/gemini.c8
-rw-r--r--drivers/net/ethernet/dec/tulip/pnic.c2
-rw-r--r--drivers/net/ethernet/dlink/sundance.c60
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c343
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h18
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c12
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h38
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_cbdr.c41
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c14
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_qos.c103
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c2
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c1
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c91
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c79
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h8
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c22
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c92
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c155
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c52
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h25
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c36
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c20
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c9
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile12
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h255
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c291
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_type.h40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.c37
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c380
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c328
-rw-r--r--drivers/net/ethernet/intel/ice/ice_osdep.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c38
-rw-r--r--drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c80
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h30
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c202
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c1519
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan.h18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan_mode.c439
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan_mode.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c707
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c103
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c396
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h27
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c38
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c22
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c35
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c19
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c20
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c27
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c20
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c319
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c247
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h13
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.c223
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.h30
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c117
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c75
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c170
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c50
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c73
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c53
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c121
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h59
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c3
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ethtool.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Makefile3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c34
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c131
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h54
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c45
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c9
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c618
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_regs.h121
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c61
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c10
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c4
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana.h15
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c70
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_ethtool.c35
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h81
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h3
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_port.h3
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h7
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c17
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c162
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h6
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c189
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c125
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c37
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c66
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c90
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h38
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c74
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c71
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c13
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c18
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c26
-rw-r--r--drivers/net/ethernet/sfc/ef100_nic.c9
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h2
-rw-r--r--drivers/net/ethernet/sfc/nic_common.h5
-rw-r--r--drivers/net/ethernet/sfc/rx_common.c18
-rw-r--r--drivers/net/ethernet/sfc/rx_common.h6
-rw-r--r--drivers/net/ethernet/sfc/siena.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c30
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c147
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c8
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet.h2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c108
-rw-r--r--drivers/net/fjes/fjes_main.c5
-rw-r--r--drivers/net/hyperv/netvsc.c24
-rw-r--r--drivers/net/ieee802154/atusb.c186
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c2
-rw-r--r--drivers/net/ipa/gsi_trans.c11
-rw-r--r--drivers/net/ipa/gsi_trans.h10
-rw-r--r--drivers/net/ipa/ipa_data-v3.1.c2
-rw-r--r--drivers/net/ipa/ipa_data-v3.5.1.c2
-rw-r--r--drivers/net/ipa/ipa_data-v4.11.c2
-rw-r--r--drivers/net/ipa/ipa_data-v4.2.c2
-rw-r--r--drivers/net/ipa/ipa_data-v4.5.c2
-rw-r--r--drivers/net/ipa/ipa_data-v4.9.c2
-rw-r--r--drivers/net/ipa/ipa_data.h2
-rw-r--r--drivers/net/ipa/ipa_endpoint.c217
-rw-r--r--drivers/net/ipa/ipa_endpoint.h8
-rw-r--r--drivers/net/mdio/mdio-xgene.c3
-rw-r--r--drivers/net/pcs/pcs-xpcs.c41
-rw-r--r--drivers/net/phy/aquantia_main.c4
-rw-r--r--drivers/net/phy/at803x.c146
-rw-r--r--drivers/net/phy/phy-core.c22
-rw-r--r--drivers/net/phy/phylink.c11
-rw-r--r--drivers/net/usb/Kconfig1
-rw-r--r--drivers/net/usb/asix.h4
-rw-r--r--drivers/net/usb/asix_common.c19
-rw-r--r--drivers/net/usb/asix_devices.c22
-rw-r--r--drivers/net/usb/cdc_mbim.c1
-rw-r--r--drivers/net/usb/smsc95xx.c25
-rw-r--r--drivers/nfc/st-nci/vendor_cmds.c2
-rw-r--r--drivers/nfc/st21nfca/vendor_cmds.c4
-rw-r--r--drivers/ptp/ptp_clock.c11
-rw-r--r--drivers/ptp/ptp_pch.c195
-rw-r--r--drivers/ptp/ptp_sysfs.c4
-rw-r--r--drivers/ptp/ptp_vclock.c56
-rw-r--r--drivers/soc/fsl/dpio/qbman-portal.c8
-rw-r--r--include/linux/bpf-cgroup.h24
-rw-r--r--include/linux/bpf.h122
-rw-r--r--include/linux/bpf_verifier.h7
-rw-r--r--include/linux/btf.h85
-rw-r--r--include/linux/btf_ids.h13
-rw-r--r--include/linux/compiler_types.h3
-rw-r--r--include/linux/dsa/tag_qca.h82
-rw-r--r--include/linux/ethtool.h2
-rw-r--r--include/linux/filter.h32
-rw-r--r--include/linux/ipv6.h9
-rw-r--r--include/linux/linkmode.h5
-rw-r--r--include/linux/mii.h50
-rw-r--r--include/linux/mlx5/mlx5_ifc.h7
-rw-r--r--include/linux/net/intel/i40e_client.h10
-rw-r--r--include/linux/net/intel/iidc.h4
-rw-r--r--include/linux/netdevice.h72
-rw-r--r--include/linux/netfilter.h1
-rw-r--r--include/linux/netfilter/nf_conntrack_pptp.h38
-rw-r--r--include/linux/netlink.h9
-rw-r--r--include/linux/pcs/pcs-xpcs.h3
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/linux/phylink.h1
-rw-r--r--include/linux/ref_tracker.h4
-rw-r--r--include/linux/skbuff.h44
-rw-r--r--include/linux/skmsg.h16
-rw-r--r--include/linux/sunrpc/svc_xprt.h1
-rw-r--r--include/linux/sunrpc/xprt.h1
-rw-r--r--include/linux/udp.h5
-rw-r--r--include/linux/uio.h17
-rw-r--r--include/net/ax25.h12
-rw-r--r--include/net/bluetooth/hci_core.h17
-rw-r--r--include/net/bluetooth/mgmt.h16
-rw-r--r--include/net/bonding.h14
-rw-r--r--include/net/cfg802154.h10
-rw-r--r--include/net/dsa.h37
-rw-r--r--include/net/gro.h52
-rw-r--r--include/net/inet_connection_sock.h8
-rw-r--r--include/net/inet_dscp.h57
-rw-r--r--include/net/inet_timewait_sock.h8
-rw-r--r--include/net/ip.h3
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--include/net/ipv6.h21
-rw-r--r--include/net/ipv6_frag.h1
-rw-r--r--include/net/mac802154.h12
-rw-r--r--include/net/mctp.h16
-rw-r--r--include/net/net_namespace.h8
-rw-r--r--include/net/netfilter/nf_conntrack_acct.h1
-rw-r--r--include/net/netfilter/nf_conntrack_bpf.h23
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h15
-rw-r--r--include/net/netfilter/nf_conntrack_extend.h18
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h3
-rw-r--r--include/net/netfilter/nf_conntrack_seqadj.h3
-rw-r--r--include/net/netfilter/nf_conntrack_timeout.h20
-rw-r--r--include/net/netfilter/nf_conntrack_timestamp.h13
-rw-r--r--include/net/netfilter/nf_tables_core.h9
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/netns/ipv4.h11
-rw-r--r--include/net/netns/ipv6.h6
-rw-r--r--include/net/page_pool.h82
-rw-r--r--include/net/pkt_cls.h11
-rw-r--r--include/net/pkt_sched.h6
-rw-r--r--include/net/request_sock.h2
-rw-r--r--include/net/sch_generic.h5
-rw-r--r--include/net/sock.h28
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/udplite.h43
-rw-r--r--include/net/xdp.h108
-rw-r--r--include/net/xdp_sock_drv.h5
-rw-r--r--include/net/xsk_buff_pool.h1
-rw-r--r--include/trace/events/mctp.h5
-rw-r--r--include/trace/events/skb.h11
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--include/uapi/linux/bpf.h80
-rw-r--r--include/uapi/linux/ethtool_netlink.h7
-rw-r--r--include/uapi/linux/ioam6_iptunnel.h9
-rw-r--r--include/uapi/linux/mctp.h18
-rw-r--r--include/uapi/linux/net_dropmon.h1
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_queue.h1
-rw-r--r--include/uapi/linux/socket.h4
-rw-r--r--init/Kconfig4
-rw-r--r--init/main.c2
-rw-r--r--kernel/bpf/arraymap.c4
-rw-r--r--kernel/bpf/bpf_iter.c20
-rw-r--r--kernel/bpf/btf.c543
-rw-r--r--kernel/bpf/cgroup.c179
-rw-r--r--kernel/bpf/core.c317
-rw-r--r--kernel/bpf/cpumap.c8
-rw-r--r--kernel/bpf/devmap.c3
-rw-r--r--kernel/bpf/helpers.c34
-rw-r--r--kernel/bpf/preload/Makefile28
-rw-r--r--kernel/bpf/preload/iterators/Makefile6
-rw-r--r--kernel/bpf/preload/iterators/iterators.c28
-rw-r--r--kernel/bpf/preload/iterators/iterators.lskel.h428
-rw-r--r--kernel/bpf/preload/iterators/iterators.skel.h412
-rw-r--r--kernel/bpf/syscall.c24
-rw-r--r--kernel/bpf/trampoline.c6
-rw-r--r--kernel/bpf/verifier.c232
-rw-r--r--kernel/trace/bpf_trace.c5
-rw-r--r--lib/Kconfig.debug12
-rw-r--r--lib/ref_tracker.c19
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/ax25/ax25_route.c5
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/bluetooth/hci_conn.c1
-rw-r--r--net/bluetooth/hci_core.c5
-rw-r--r--net/bluetooth/hci_event.c89
-rw-r--r--net/bluetooth/hci_sync.c7
-rw-r--r--net/bluetooth/mgmt.c117
-rw-r--r--net/bluetooth/msft.c170
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c6
-rw-r--r--net/bpf/test_run.c277
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/can/gw.c9
-rw-r--r--net/core/dev.c48
-rw-r--r--net/core/drop_monitor.c41
-rw-r--r--net/core/filter.c273
-rw-r--r--net/core/gro.c16
-rw-r--r--net/core/link_watch.c6
-rw-r--r--net/core/net_namespace.c20
-rw-r--r--net/core/page_pool.c23
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/sock.c18
-rw-r--r--net/core/sock_map.c77
-rw-r--r--net/core/sysctl_net_core.c14
-rw-r--r--net/core/xdp.c78
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dsa/dsa2.c74
-rw-r--r--net/dsa/dsa_priv.h13
-rw-r--r--net/dsa/slave.c32
-rw-r--r--net/dsa/switch.c55
-rw-r--r--net/dsa/tag_qca.c85
-rw-r--r--net/ethtool/rings.c15
-rw-r--r--net/hsr/hsr_debugfs.c40
-rw-r--r--net/hsr/hsr_device.c10
-rw-r--r--net/hsr/hsr_forward.c7
-rw-r--r--net/hsr/hsr_framereg.c200
-rw-r--r--net/hsr/hsr_framereg.h8
-rw-r--r--net/hsr/hsr_main.h14
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/nl-phy.c4
-rw-r--r--net/ipv4/bpf_tcp_ca.c28
-rw-r--r--net/ipv4/fib_frontend.c30
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c58
-rw-r--r--net/ipv4/fib_trie.c58
-rw-r--r--net/ipv4/icmp.c91
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_hashtables.c53
-rw-r--r--net/ipv4/inet_timewait_sock.c75
-rw-r--r--net/ipv4/ip_input.c31
-rw-r--r--net/ipv4/ip_options.c31
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ipmr.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c24
-rw-r--r--net/ipv4/nexthop.c12
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/route.c27
-rw-r--r--net/ipv4/sysctl_net_ipv4.c20
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_bbr.c18
-rw-r--r--net/ipv4/tcp_cubic.c17
-rw-r--r--net/ipv4/tcp_dctcp.c18
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c75
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/udp.c22
-rw-r--r--net/ipv6/addrconf.c145
-rw-r--r--net/ipv6/exthdrs.c8
-rw-r--r--net/ipv6/fib6_rules.c30
-rw-r--r--net/ipv6/icmp.c62
-rw-r--r--net/ipv6/inet6_hashtables.c5
-rw-r--r--net/ipv6/ioam6_iptunnel.c59
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_offload.c5
-rw-r--r--net/ipv6/ip6_output.c99
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6mr.c35
-rw-r--r--net/ipv6/ping.c14
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/ipv6/udp.c103
-rw-r--r--net/mctp/af_mctp.c189
-rw-r--r--net/mctp/device.c1
-rw-r--r--net/mctp/route.c124
-rw-r--r--net/mctp/test/route-test.c157
-rw-r--r--net/mptcp/options.c64
-rw-r--r--net/mptcp/pm_netlink.c50
-rw-r--r--net/netfilter/Makefile5
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/nf_conntrack_acct.c19
-rw-r--r--net/netfilter/nf_conntrack_bpf.c257
-rw-r--r--net/netfilter/nf_conntrack_core.c102
-rw-r--r--net/netfilter/nf_conntrack_ecache.c47
-rw-r--r--net/netfilter/nf_conntrack_extend.c132
-rw-r--r--net/netfilter/nf_conntrack_helper.c17
-rw-r--r--net/netfilter/nf_conntrack_labels.c20
-rw-r--r--net/netfilter/nf_conntrack_netlink.c36
-rw-r--r--net/netfilter/nf_conntrack_pptp.c60
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c4
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c16
-rw-r--r--net/netfilter/nf_conntrack_timeout.c50
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c20
-rw-r--r--net/netfilter/nf_nat_core.c28
-rw-r--r--net/netfilter/nf_synproxy_core.c24
-rw-r--r--net/netfilter/nf_tables_core.c16
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c11
-rw-r--r--net/netfilter/nfnetlink_queue.c13
-rw-r--r--net/netfilter/nft_cmp.c102
-rw-r--r--net/netfilter/nft_compat.c9
-rw-r--r--net/netfilter/nft_exthdr.c96
-rw-r--r--net/openvswitch/datapath.c18
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c3
-rw-r--r--net/sched/act_ct.c13
-rw-r--r--net/sched/cls_api.c45
-rw-r--r--net/smc/af_smc.c8
-rw-r--r--net/smc/smc_tx.c59
-rw-r--r--net/smc/smc_tx.h3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c10
-rw-r--r--net/sunrpc/svc_xprt.c4
-rw-r--r--net/sunrpc/xprt.c4
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--net/unix/af_unix.c250
-rw-r--r--net/xdp/xsk.c13
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.h19
-rw-r--r--samples/bpf/map_perf_test_user.c2
-rw-r--r--samples/bpf/xdp1_user.c24
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c25
-rw-r--r--samples/bpf/xdp_fwd_user.c19
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c8
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c2
-rw-r--r--samples/bpf/xdp_redirect_map.bpf.c2
-rw-r--r--samples/bpf/xdp_redirect_map_multi.bpf.c2
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c27
-rw-r--r--samples/bpf/xdp_rxq_info_user.c34
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c8
-rw-r--r--samples/bpf/xdp_sample_user.c11
-rw-r--r--samples/bpf/xdp_sample_user.h2
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c27
-rw-r--r--samples/bpf/xdpsock_ctrl_proc.c2
-rw-r--r--samples/bpf/xdpsock_user.c10
-rw-r--r--samples/bpf/xsk_fwd.c4
-rwxr-xr-xscripts/bpf_doc.py124
-rwxr-xr-xscripts/pahole-flags.sh2
-rwxr-xr-xscripts/pahole-version.sh13
-rw-r--r--security/device_cgroup.c2
-rw-r--r--tools/bpf/bpftool/btf.c2
-rw-r--r--tools/bpf/bpftool/cgroup.c6
-rw-r--r--tools/bpf/bpftool/common.c44
-rw-r--r--tools/bpf/bpftool/feature.c29
-rw-r--r--tools/bpf/bpftool/gen.c23
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/main.c6
-rw-r--r--tools/bpf/bpftool/main.h4
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/net.c2
-rw-r--r--tools/bpf/bpftool/pids.c3
-rw-r--r--tools/bpf/bpftool/prog.c43
-rw-r--r--tools/bpf/bpftool/struct_ops.c4
-rw-r--r--tools/bpf/resolve_btfids/Makefile6
-rw-r--r--tools/include/uapi/linux/bpf.h80
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/lib/bpf/bpf.c9
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/bpf_helpers.h2
-rw-r--r--tools/lib/bpf/bpf_tracing.h103
-rw-r--r--tools/lib/bpf/btf.c31
-rw-r--r--tools/lib/bpf/btf.h34
-rw-r--r--tools/lib/bpf/btf_dump.c6
-rw-r--r--tools/lib/bpf/hashmap.c3
-rw-r--r--tools/lib/bpf/libbpf.c70
-rw-r--r--tools/lib/bpf/libbpf.h73
-rw-r--r--tools/lib/bpf/libbpf.map7
-rw-r--r--tools/lib/bpf/libbpf_internal.h3
-rw-r--r--tools/lib/bpf/libbpf_legacy.h22
-rw-r--r--tools/lib/bpf/netlink.c117
-rw-r--r--tools/lib/bpf/skel_internal.h70
-rw-r--r--tools/perf/tests/llvm.c2
-rw-r--r--tools/perf/util/bpf-loader.c74
-rw-r--r--tools/perf/util/bpf_map.c28
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/README.rst2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/config5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c230
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_tag.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c481
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_args.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_opts.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c274
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c73
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_profiler.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_ext.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c251
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c63
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_info.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c19
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_bench.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_map.c5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c60
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c54
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c9
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h19
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_mod_race.c100
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_syscall_macro.c84
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h2
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_user.c40
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c45
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c52
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern.c16
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern_overflow.c22
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_sleep.c9
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_cls_redirect.c12
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_race.c14
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c52
-rw-r--r--tools/testing/selftests/bpf/progs/ksym_race.c13
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c3
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sample_map_ret0.c24
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c35
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c118
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_decl_tag.c (renamed from tools/testing/selftests/bpf/progs/btf_decl_tag.c)0
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_nokv.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_update_frags.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c9
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c3
-rw-r--r--tools/testing/selftests/bpf/progs/trace_vprintk.c3
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c9
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c2
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c11
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh170
-rw-r--r--tools/testing/selftests/bpf/test_maps.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh5
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c48
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh38
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh30
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh60
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh39
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh66
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c70
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c75
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c81
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c8
-rw-r--r--tools/testing/selftests/bpf/xdping.c4
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c85
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h2
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c380
-rw-r--r--tools/testing/selftests/net/cmsg_so_mark.c67
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_mark.sh32
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh83
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh86
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh76
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh5
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh5
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_ip.sh201
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh900
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh18
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c81
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh4
-rw-r--r--tools/testing/selftests/net/timestamping.c4
791 files changed, 27288 insertions, 10229 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 4150f74c521a..f86b5e1623c6 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -365,6 +365,15 @@ new netns has been created.
Default : 0 (for compatibility reasons)
+txrehash
+--------
+
+Controls default hash rethink behaviour on listening socket when SO_TXREHASH
+option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+
+If set to 1 (default), hash rethink is performed on listening socket.
+If set to 0, hash rethink is not performed.
+
2. /proc/sys/net/unix - Parameters for Unix domain sockets
----------------------------------------------------------
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 1ebf4c5c7ddc..7940da9bc6c1 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument.
* ``info.vlen``: 0
* ``type``: the type with ``btf_type_tag`` attribute
+Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
+It has the following btf type chain:
+::
+
+ ptr -> [type_tag]*
+ -> [const | volatile | restrict | typedef]*
+ -> base_type
+
+Basically, a pointer type points to zero or more
+type_tag, then zero or more const/volatile/restrict/typedef
+and finally the base type. The base type is one of
+int, ptr, array, struct, union, enum, func_proto and float types.
+
3. BTF Kernel API
=================
@@ -565,18 +578,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
In libbpf, the map can be defined with extra annotation like below:
::
- struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
- };
- BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct ipv_counts);
+ __uint(max_entries, 4);
+ } btf_map SEC(".maps");
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
.. _BPF_Prog_Load:
@@ -824,13 +834,12 @@ structure has bitfields. For example, for the following map,::
___A b1:4;
enum A b2:4;
};
- struct bpf_map_def SEC("maps") tmpmap = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct tmp_t),
- .max_entries = 1,
- };
- BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct tmp_t);
+ __uint(max_entries, 1);
+ } tmpmap SEC(".maps");
bpftool is able to pretty print like below:
::
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 3704836fe6df..5300837ac2c9 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -22,7 +22,13 @@ necessary across calls.
Instruction encoding
====================
-eBPF uses 64-bit instructions with the following encoding:
+eBPF has two instruction encodings:
+
+ * the basic instruction encoding, which uses 64 bits to encode an instruction
+ * the wide instruction encoding, which appends a second 64-bit immediate value
+ (imm64) after the basic instruction for a total of 128 bits.
+
+The basic instruction encoding looks as follows:
============= ======= =============== ==================== ============
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
@@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
otherwise identical operations.
The code field encodes the operation as below:
- ======== ===== ==========================
+ ======== ===== =================================================
code value description
- ======== ===== ==========================
+ ======== ===== =================================================
BPF_ADD 0x00 dst += src
BPF_SUB 0x10 dst -= src
BPF_MUL 0x20 dst \*= src
@@ -98,8 +104,8 @@ The code field encodes the operation as below:
BPF_XOR 0xa0 dst ^= src
BPF_MOV 0xb0 dst = src
BPF_ARSH 0xc0 sign extending shift right
- BPF_END 0xd0 endianness conversion
- ======== ===== ==========================
+ BPF_END 0xd0 byte swap operations (see separate section below)
+ ======== ===== =================================================
BPF_ADD | BPF_X | BPF_ALU means::
@@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
src_reg = src_reg ^ imm32
+Byte swap instructions
+----------------------
+
+The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
+code field of ``BPF_END``.
+
+The byte swap instructions instructions operate on the destination register
+only and do not use a separate source register or immediate value.
+
+The 1-bit source operand field in the opcode is used to to select what byte
+order the operation convert from or to:
+
+ ========= ===== =================================================
+ source value description
+ ========= ===== =================================================
+ BPF_TO_LE 0x00 convert between host byte order and little endian
+ BPF_TO_BE 0x08 convert between host byte order and big endian
+ ========= ===== =================================================
+
+The imm field encodes the width of the swap operations. The following widths
+are supported: 16, 32 and 64.
+
+Examples:
+
+``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means::
+
+ dst_reg = htole16(dst_reg)
+
+``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means::
+
+ dst_reg = htobe64(dst_reg)
+
+``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and
+``BPF_TO_LE`` respetively.
+
+
Jump instructions
-----------------
@@ -176,63 +218,96 @@ The mode modifier is one of:
============= ===== ====================================
mode modifier value description
============= ===== ====================================
- BPF_IMM 0x00 used for 64-bit mov
- BPF_ABS 0x20 legacy BPF packet access
- BPF_IND 0x40 legacy BPF packet access
- BPF_MEM 0x60 all normal load and store operations
+ BPF_IMM 0x00 64-bit immediate instructions
+ BPF_ABS 0x20 legacy BPF packet access (absolute)
+ BPF_IND 0x40 legacy BPF packet access (indirect)
+ BPF_MEM 0x60 regular load and store operations
BPF_ATOMIC 0xc0 atomic operations
============= ===== ====================================
-BPF_MEM | <size> | BPF_STX means::
+
+Regular load and store operations
+---------------------------------
+
+The ``BPF_MEM`` mode modifier is used to encode regular load and store
+instructions that transfer data between a register and memory.
+
+``BPF_MEM | <size> | BPF_STX`` means::
*(size *) (dst_reg + off) = src_reg
-BPF_MEM | <size> | BPF_ST means::
+``BPF_MEM | <size> | BPF_ST`` means::
*(size *) (dst_reg + off) = imm32
-BPF_MEM | <size> | BPF_LDX means::
+``BPF_MEM | <size> | BPF_LDX`` means::
dst_reg = *(size *) (src_reg + off)
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
Atomic operations
-----------------
-eBPF includes atomic operations, which use the immediate field for extra
-encoding::
+Atomic operations are operations that operate on memory and can not be
+interrupted or corrupted by other access to the same memory region
+by other eBPF programs or means outside of this specification.
+
+All atomic operations supported by eBPF are encoded as store operations
+that use the ``BPF_ATOMIC`` mode modifier as follows:
+
+ * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
+ * ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
+ * 8-bit and 16-bit wide atomic operations are not supported.
- .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
- .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
+The imm field is used to encode the actual atomic operation.
+Simple atomic operation use a subset of the values defined to encode
+arithmetic operations in the imm field to encode the atomic operation:
-The basic atomic operations supported are::
+ ======== ===== ===========
+ imm value description
+ ======== ===== ===========
+ BPF_ADD 0x00 atomic add
+ BPF_OR 0x40 atomic or
+ BPF_AND 0x50 atomic and
+ BPF_XOR 0xa0 atomic xor
+ ======== ===== ===========
- BPF_ADD
- BPF_AND
- BPF_OR
- BPF_XOR
-Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
-memory location addresed by ``dst_reg + off`` is atomically modified, with
-``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
-immediate, then these operations also overwrite ``src_reg`` with the
-value that was in memory before it was modified.
+``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means::
-The more special operations are::
+ *(u32 *)(dst_reg + off16) += src_reg
- BPF_XCHG
+``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means::
-This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
-off``. ::
+ *(u64 *)(dst_reg + off16) += src_reg
- BPF_CMPXCHG
+``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``.
-This atomically compares the value addressed by ``dst_reg + off`` with
-``R0``. If they match it is replaced with ``src_reg``. In either case, the
-value that was there before is zero-extended and loaded back to ``R0``.
+In addition to the simple atomic operations, there also is a modifier and
+two complex atomic operations:
-Note that 1 and 2 byte atomic operations are not supported.
+ =========== ================ ===========================
+ imm value description
+ =========== ================ ===========================
+ BPF_FETCH 0x01 modifier: return old value
+ BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
+ BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
+ =========== ================ ===========================
+
+The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
+always set for the complex atomic operations. If the ``BPF_FETCH`` flag
+is set, then the operation also overwrites ``src_reg`` with the value that
+was in memory before it was modified.
+
+The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value
+addressed by ``dst_reg + off``.
+
+The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
+``dst_reg + off`` with ``R0``. If they match, the value addressed by
+``dst_reg + off`` is replaced with ``src_reg``. In either case, the
+value that was at ``dst_reg + off`` before the operation is zero-extended
+and loaded back to ``R0``.
Clang can generate atomic instructions by default when ``-mcpu=v3`` is
enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
@@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
the atomics features, while keeping a lower ``-mcpu`` version, you can use
``-Xclang -target-feature -Xclang +alu32``.
-You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
-referring to the exclusive-add operation encoded when the immediate field is
-zero.
+64-bit immediate instructions
+-----------------------------
-16-byte instructions
---------------------
+Instructions with the ``BPF_IMM`` mode modifier use the wide instruction
+encoding for an extra imm64 value.
-eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
-of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
-instruction that loads 64-bit immediate value into a dst_reg.
+There is currently only one such instruction.
-Packet access instructions
---------------------------
+``BPF_LD | BPF_DW | BPF_IMM`` means::
-eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
-(BPF_IND | <size> | BPF_LD) which are used to access packet data.
+ dst_reg = imm64
-They had to be carried over from classic BPF to have strong performance of
-socket filters running in eBPF interpreter. These instructions can only
-be used when interpreter context is a pointer to ``struct sk_buff`` and
-have seven implicit operands. Register R6 is an implicit input that must
-contain pointer to sk_buff. Register R0 is an implicit output which contains
-the data fetched from the packet. Registers R1-R5 are scratch registers
-and must not be used to store the data across BPF_ABS | BPF_LD or
-BPF_IND | BPF_LD instructions.
-These instructions have implicit program exit condition as well. When
-eBPF program is trying to access the data beyond the packet boundary,
-the interpreter will abort the execution of the program. JIT compilers
-therefore must preserve this property. src_reg and imm32 fields are
-explicit inputs to these instructions.
+Legacy BPF Packet access instructions
+-------------------------------------
-For example, BPF_IND | BPF_W | BPF_LD means::
+eBPF has special instructions for access to packet data that have been
+carried over from classic BPF to retain the performance of legacy socket
+filters running in the eBPF interpreter.
- R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
+The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
+``BPF_IND | <size> | BPF_LD``.
-and R1 - R5 are clobbered.
+These instructions are used to access packet data and can only be used when
+the program context is a pointer to networking packet. ``BPF_ABS``
+accesses packet data at an absolute offset specified by the immediate data
+and ``BPF_IND`` access packet data at an offset that includes the value of
+a register in addition to the immediate data.
+
+These instructions have seven implicit operands:
+
+ * Register R6 is an implicit input that must contain pointer to a
+ struct sk_buff.
+ * Register R0 is an implicit output which contains the data fetched from
+ the packet.
+ * Registers R1-R5 are scratch registers that are clobbered after a call to
+ ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
+
+These instructions have an implicit program exit condition as well. When an
+eBPF program is trying to access the data beyond the packet boundary, the
+program execution will be aborted.
+
+``BPF_ABS | BPF_W | BPF_LD`` means::
+
+ R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32))
+
+``BPF_IND | BPF_W | BPF_LD`` means::
+
+ R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml
index 8dd06db34169..6cd3d853dcba 100644
--- a/Documentation/devicetree/bindings/net/cdns,macb.yaml
+++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml
@@ -81,6 +81,25 @@ properties:
phy-handle: true
+ phys:
+ maxItems: 1
+
+ phy-names:
+ const: sgmii-phy
+ description:
+ Required with ZynqMP SoC when in SGMII mode.
+ Should reference PS-GTR generic PHY device for this controller
+ instance. See ZynqMP example.
+
+ resets:
+ maxItems: 1
+ description:
+ Recommended with ZynqMP, specify reset control for this
+ controller instance with zynqmp-reset driver.
+
+ reset-names:
+ maxItems: 1
+
fixed-link: true
iommus:
@@ -157,3 +176,40 @@ examples:
reset-gpios = <&pioE 6 1>;
};
};
+
+ - |
+ #include <dt-bindings/clock/xlnx-zynqmp-clk.h>
+ #include <dt-bindings/power/xlnx-zynqmp-power.h>
+ #include <dt-bindings/reset/xlnx-zynqmp-resets.h>
+ #include <dt-bindings/phy/phy.h>
+
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ gem1: ethernet@ff0c0000 {
+ compatible = "cdns,zynqmp-gem", "cdns,gem";
+ interrupt-parent = <&gic>;
+ interrupts = <0 59 4>, <0 59 4>;
+ reg = <0x0 0xff0c0000 0x0 0x1000>;
+ clocks = <&zynqmp_clk LPD_LSBUS>, <&zynqmp_clk GEM1_REF>,
+ <&zynqmp_clk GEM1_TX>, <&zynqmp_clk GEM1_RX>,
+ <&zynqmp_clk GEM_TSU>;
+ clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #stream-id-cells = <1>;
+ iommus = <&smmu 0x875>;
+ power-domains = <&zynqmp_firmware PD_ETH_1>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>;
+ reset-names = "gem1_rst";
+ status = "okay";
+ phy-mode = "sgmii";
+ phy-names = "sgmii-phy";
+ phys = <&psgtr 1 PHY_TYPE_SGMII 1 1>;
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index 84985f53bffd..184152087b60 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -42,6 +42,12 @@ properties:
description:
Set if the output SYNCLKO frequency should be set to 125MHz instead of 25MHz.
+ microchip,synclko-disable:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Set if the output SYNCLKO clock should be disabled. Do not mix with
+ microchip,synclko-125.
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index 020337f3c05f..801efc7d6818 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -388,14 +388,24 @@ PROPERTIES
Value type: <prop-encoded-array>
Definition: A standard property.
-- bus-frequency
+- clocks
+ Usage: optional
+ Value type: <phandle>
+ Definition: A reference to the input clock of the controller
+ from which the MDC frequency is derived.
+
+- clock-frequency
Usage: optional
Value type: <u32>
- Definition: Specifies the external MDIO bus clock speed to
- be used, if different from the standard 2.5 MHz.
- This may be due to the standard speed being unsupported (e.g.
- due to a hardware problem), or to advertise that all relevant
- components in the system support a faster speed.
+ Definition: Specifies the external MDC frequency, in Hertz, to
+ be used. Requires that the input clock is specified in the
+ "clocks" property. See also: mdio.yaml.
+
+- suppress-preamble
+ Usage: optional
+ Value type: <boolean>
+ Definition: Disable generation of preamble bits. See also:
+ mdio.yaml.
- interrupts
Usage: required for external MDIO
diff --git a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
index e79e4e166ad8..13812768b923 100644
--- a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
+++ b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
@@ -38,6 +38,7 @@ properties:
- description: register based extraction
- description: frame dma based extraction
- description: analyzer interrupt
+ - description: ptp interrupt
interrupt-names:
minItems: 1
@@ -45,6 +46,7 @@ properties:
- const: xtr
- const: fdma
- const: ana
+ - const: ptp
resets:
items:
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index bda821065a2b..ee2ccacc39ff 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -45,8 +45,10 @@ properties:
- items:
- enum:
+ - renesas,r9a07g043-gbeth # RZ/G2UL
- renesas,r9a07g044-gbeth # RZ/G2{L,LC}
- - const: renesas,rzg2l-gbeth # RZ/G2L
+ - renesas,r9a07g054-gbeth # RZ/V2L
+ - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family
reg: true
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 9d98e0511249..cae28af7a476 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -860,8 +860,16 @@ Kernel response contents:
``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring
``ETHTOOL_A_RINGS_TX`` u32 size of TX ring
``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring
+ ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split
==================================== ====== ===========================
+``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with
+page-flipping TCP zero-copy receive (``getsockopt(TCP_ZEROCOPY_RECEIVE)``).
+If enabled the device is configured to place frame headers and data into
+separate buffers. The device configuration must make it possible to receive
+full memory pages of data, for example because MTU is high enough or through
+HW-GRO.
+
RINGS_SET
=========
diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst
index 46f74bffce0f..c628cb5406d2 100644
--- a/Documentation/networking/mctp.rst
+++ b/Documentation/networking/mctp.rst
@@ -212,6 +212,54 @@ remote address is already known, or the message does not require a reply.
Like the send calls, sockets will only receive responses to requests they have
sent (TO=1) and may only respond (TO=0) to requests they have received.
+``ioctl(SIOCMCTPALLOCTAG)`` and ``ioctl(SIOCMCTPDROPTAG)``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These tags give applications more control over MCTP message tags, by allocating
+(and dropping) tag values explicitly, rather than the kernel automatically
+allocating a per-message tag at ``sendmsg()`` time.
+
+In general, you will only need to use these ioctls if your MCTP protocol does
+not fit the usual request/response model. For example, if you need to persist
+tags across multiple requests, or a request may generate more than one response.
+In these cases, the ioctls allow you to decouple the tag allocation (and
+release) from individual message send and receive operations.
+
+Both ioctls are passed a pointer to a ``struct mctp_ioc_tag_ctl``:
+
+.. code-block:: C
+
+ struct mctp_ioc_tag_ctl {
+ mctp_eid_t peer_addr;
+ __u8 tag;
+ __u16 flags;
+ };
+
+``SIOCMCTPALLOCTAG`` allocates a tag for a specific peer, which an application
+can use in future ``sendmsg()`` calls. The application populates the
+``peer_addr`` member with the remote EID. Other fields must be zero.
+
+On return, the ``tag`` member will be populated with the allocated tag value.
+The allocated tag will have the following tag bits set:
+
+ - ``MCTP_TAG_OWNER``: it only makes sense to allocate tags if you're the tag
+ owner
+
+ - ``MCTP_TAG_PREALLOC``: to indicate to ``sendmsg()`` that this is a
+ preallocated tag.
+
+ - ... and the actual tag value, within the least-significant three bits
+ (``MCTP_TAG_MASK``). Note that zero is a valid tag value.
+
+The tag value should be used as-is for the ``smctp_tag`` member of ``struct
+sockaddr_mctp``.
+
+``SIOCMCTPDROPTAG`` releases a tag that has been previously allocated by a
+``SIOCMCTPALLOCTAG`` ioctl. The ``peer_addr`` must be the same as used for the
+allocation, and the ``tag`` value must match exactly the tag returned from the
+allocation (including the ``MCTP_TAG_OWNER`` and ``MCTP_TAG_PREALLOC`` bits).
+The ``flags`` field must be zero.
+
Kernel internals
================
diff --git a/MAINTAINERS b/MAINTAINERS
index 40ff05f05d35..25eb3097baae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3523,6 +3523,8 @@ F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
F: samples/bpf/
F: scripts/bpf_doc.py
+F: scripts/pahole-flags.sh
+F: scripts/pahole-version.sh
F: tools/bpf/
F: tools/lib/bpf/
F: tools/testing/selftests/bpf/
@@ -16357,8 +16359,7 @@ REALTEK RTL83xx SMI DSA ROUTER CHIPS
M: Linus Walleij <linus.walleij@linaro.org>
S: Maintained
F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
-F: drivers/net/dsa/realtek-smi*
-F: drivers/net/dsa/rtl83*
+F: drivers/net/dsa/realtek/*
REALTEK WIRELESS DRIVER (rtlwifi family)
M: Ping-Ke Shih <pkshih@realtek.com>
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 284d28755b8d..7d81535893af 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -133,6 +133,8 @@
#define SO_RESERVE_MEM 73
+#define SO_TXREHASH 74
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index 74e66443e4ce..9bec3ba20c69 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -512,6 +512,8 @@
#stream-id-cells = <1>;
iommus = <&smmu 0x874>;
power-domains = <&zynqmp_firmware PD_ETH_0>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_GEM0>;
+ reset-names = "gem0_rst";
};
gem1: ethernet@ff0c0000 {
@@ -526,6 +528,8 @@
#stream-id-cells = <1>;
iommus = <&smmu 0x875>;
power-domains = <&zynqmp_firmware PD_ETH_1>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>;
+ reset-names = "gem1_rst";
};
gem2: ethernet@ff0d0000 {
@@ -540,6 +544,8 @@
#stream-id-cells = <1>;
iommus = <&smmu 0x876>;
power-domains = <&zynqmp_firmware PD_ETH_2>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_GEM2>;
+ reset-names = "gem2_rst";
};
gem3: ethernet@ff0e0000 {
@@ -554,6 +560,8 @@
#stream-id-cells = <1>;
iommus = <&smmu 0x877>;
power-domains = <&zynqmp_firmware PD_ETH_3>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_GEM3>;
+ reset-names = "gem3_rst";
};
gpio: gpio@ff0a0000 {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index e96d4d87291f..74f9a9b6a053 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1143,6 +1143,11 @@ out:
return prog;
}
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
u64 bpf_jit_alloc_exec_limit(void)
{
return VMALLOC_END - VMALLOC_START;
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 24e0efb360f6..1d55e57b8466 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -144,6 +144,8 @@
#define SO_RESERVE_MEM 73
+#define SO_TXREHASH 74
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 845ddc63c882..654061e0964e 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -125,6 +125,8 @@
#define SO_RESERVE_MEM 0x4047
+#define SO_TXREHASH 0x4048
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 56dd1f4e3e44..a4f4d347e6bd 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -264,7 +264,7 @@ skip_codegen_passes:
fp->jited = 1;
fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
- bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+ bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
if (!fp->is_func || extra_pass) {
bpf_jit_binary_lock_ro(bpf_hdr);
bpf_prog_fill_jited_linfo(fp, addrs);
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2672dd03faf3..666f81e617ea 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -126,6 +126,8 @@
#define SO_RESERVE_MEM 0x0052
+#define SO_TXREHASH 0x0053
+
#if !defined(__KERNEL__)
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index b1e38784eb23..fa0759bfe498 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1599,7 +1599,7 @@ skip_init_ctx:
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, pass, ctx.image);
- bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE));
+ bpf_flush_icache(header, (u8 *)header + header->size);
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(header);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660..995f2dc28631 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -158,6 +158,7 @@ config X86
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index b7421780e4e9..4cc18ba1b75e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -44,6 +44,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..018b61febf0e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1102,6 +1102,40 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
return __text_poke(addr, opcode, len);
}
+/**
+ * text_poke_copy - Copy instructions into (an unused part of) RX memory
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * Not safe against concurrent execution; useful for JITs to dump
+ * new code blocks into unused regions of RX memory. Can be used in
+ * conjunction with synchronize_rcu_tasks() to wait for existing
+ * execution to quiesce after having made sure no existing functions
+ * pointers are live.
+ */
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+ unsigned long start = (unsigned long)addr;
+ size_t patched = 0;
+
+ if (WARN_ON_ONCE(core_kernel_text(start)))
+ return NULL;
+
+ mutex_lock(&text_mutex);
+ while (patched < len) {
+ unsigned long ptr = start + patched;
+ size_t s;
+
+ s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
+
+ __text_poke((void *)ptr, opcode + patched, s);
+ patched += s;
+ }
+ mutex_unlock(&text_mutex);
+ return addr;
+}
+
static void do_sync_core(void *info)
{
sync_core();
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2b1e266ff95c..c7db0fe4de2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -330,8 +330,7 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
}
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr,
- const bool text_live)
+ void *old_addr, void *new_addr)
{
const u8 *nop_insn = x86_nops[5];
u8 old_insn[X86_PATCH_SIZE];
@@ -365,10 +364,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
goto out;
ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
- if (text_live)
- text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
- else
- memcpy(ip, new_insn, X86_PATCH_SIZE);
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
ret = 0;
}
out:
@@ -384,7 +380,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
/* BPF poking in modules is not supported */
return -EINVAL;
- return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
}
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
@@ -558,24 +554,15 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
mutex_lock(&array->aux->poke_mutex);
target = array->ptrs[poke->tail_call.key];
if (target) {
- /* Plain memcpy is used when image is not live yet
- * and still not locked as read-only. Once poke
- * location is active (poke->tailcall_target_stable),
- * any parallel bpf_arch_text_poke() might occur
- * still on the read-write image until we finally
- * locked it as read-only. Both modifications on
- * the given image are under text_mutex to avoid
- * interference.
- */
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP, NULL,
(u8 *)target->bpf_func +
- poke->adj_off, false);
+ poke->adj_off);
BUG_ON(ret < 0);
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
(u8 *)poke->tailcall_target +
- X86_PATCH_SIZE, NULL, false);
+ X86_PATCH_SIZE, NULL);
BUG_ON(ret < 0);
}
WRITE_ONCE(poke->tailcall_target_stable, true);
@@ -787,7 +774,6 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
- case BPF_SUB:
case BPF_AND:
case BPF_OR:
case BPF_XOR:
@@ -867,7 +853,7 @@ static void emit_nops(u8 **pprog, int len)
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
-static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
@@ -894,8 +880,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
push_callee_regs(&prog, callee_regs_used);
ilen = prog - temp;
- if (image)
- memcpy(image + proglen, temp, ilen);
+ if (rw_image)
+ memcpy(rw_image + proglen, temp, ilen);
proglen += ilen;
addrs[0] = proglen;
prog = temp;
@@ -1324,6 +1310,9 @@ st: if (is_imm8(insn->off))
pr_err("extable->insn doesn't fit into 32-bit\n");
return -EFAULT;
}
+ /* switch ex to rw buffer for writes */
+ ex = (void *)rw_image + ((void *)ex - (void *)image);
+
ex->insn = delta;
ex->data = EX_TYPE_BPF;
@@ -1706,7 +1695,7 @@ emit_jmp:
pr_err("bpf_jit: fatal error\n");
return -EFAULT;
}
- memcpy(image + proglen, temp, ilen);
+ memcpy(rw_image + proglen, temp, ilen);
}
proglen += ilen;
addrs[i] = proglen;
@@ -2247,6 +2236,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
}
struct x64_jit_data {
+ struct bpf_binary_header *rw_header;
struct bpf_binary_header *header;
int *addrs;
u8 *image;
@@ -2259,6 +2249,7 @@ struct x64_jit_data {
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ struct bpf_binary_header *rw_header = NULL;
struct bpf_binary_header *header = NULL;
struct bpf_prog *tmp, *orig_prog = prog;
struct x64_jit_data *jit_data;
@@ -2267,6 +2258,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
bool padding = false;
+ u8 *rw_image = NULL;
u8 *image = NULL;
int *addrs;
int pass;
@@ -2302,6 +2294,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
oldproglen = jit_data->proglen;
image = jit_data->image;
header = jit_data->header;
+ rw_header = jit_data->rw_header;
+ rw_image = (void *)rw_header + ((void *)image - (void *)header);
extra_pass = true;
padding = true;
goto skip_init_addrs;
@@ -2332,12 +2326,12 @@ skip_init_addrs:
for (pass = 0; pass < MAX_PASSES || image; pass++) {
if (!padding && pass >= PADDING_PASSES)
padding = true;
- proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
+ proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
if (proglen <= 0) {
out_image:
image = NULL;
if (header)
- bpf_jit_binary_free(header);
+ bpf_jit_binary_pack_free(header, rw_header);
prog = orig_prog;
goto out_addrs;
}
@@ -2361,8 +2355,9 @@ out_image:
sizeof(struct exception_table_entry);
/* allocate module memory for x86 insns and extable */
- header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
- &image, align, jit_fill_hole);
+ header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
+ &image, align, &rw_header, &rw_image,
+ jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
@@ -2378,14 +2373,26 @@ out_image:
if (image) {
if (!prog->is_func || extra_pass) {
+ /*
+ * bpf_jit_binary_pack_finalize fails in two scenarios:
+ * 1) header is not pointing to proper module memory;
+ * 2) the arch doesn't support bpf_arch_text_copy().
+ *
+ * Both cases are serious bugs and justify WARN_ON.
+ */
+ if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) {
+ prog = orig_prog;
+ goto out_addrs;
+ }
+
bpf_tail_call_direct_fixup(prog);
- bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;
jit_data->ctx = ctx;
jit_data->proglen = proglen;
jit_data->image = image;
jit_data->header = header;
+ jit_data->rw_header = rw_header;
}
prog->bpf_func = (void *)image;
prog->jited = 1;
@@ -2413,3 +2420,10 @@ bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ if (text_poke_copy(dst, src, len) == NULL)
+ return ERR_PTR(-EINVAL);
+ return dst;
+}
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 1a4f8b227eac..06514ed66022 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -2428,10 +2428,15 @@ static int btintel_setup_combined(struct hci_dev *hdev)
/* Apply the device specific HCI quirks
*
- * WBS for SdP - SdP and Stp have a same hw_varaint but
- * different fw_variant
+ * WBS for SdP - For the Legacy ROM products, only SdP
+ * supports the WBS. But the version information is not
+ * enough to use here because the StP2 and SdP have same
+ * hw_variant and fw_variant. So, this flag is set by
+ * the transport driver (btusb) based on the HW info
+ * (idProduct)
*/
- if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
+ if (!btintel_test_flag(hdev,
+ INTEL_ROM_LEGACY_NO_WBS_SUPPORT))
set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
&hdev->quirks);
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index c9b24e9299e2..e0060e58573c 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -152,6 +152,7 @@ enum {
INTEL_BROKEN_INITIAL_NCMD,
INTEL_BROKEN_SHUTDOWN_LED,
INTEL_ROM_LEGACY,
+ INTEL_ROM_LEGACY_NO_WBS_SUPPORT,
__INTEL_NUM_FLAGS,
};
diff --git a/drivers/bluetooth/btmrvl_debugfs.c b/drivers/bluetooth/btmrvl_debugfs.c
index c4867576be00..db35b917aecf 100644
--- a/drivers/bluetooth/btmrvl_debugfs.c
+++ b/drivers/bluetooth/btmrvl_debugfs.c
@@ -1,4 +1,4 @@
-/**
+/*
* Marvell Bluetooth driver: debugfs related functions
*
* Copyright (C) 2009, Marvell International Ltd.
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 68378b42ea7f..b8ef66f89fc1 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1,4 +1,4 @@
-/**
+/*
* Marvell BT-over-SDIO driver: SDIO interface related functions.
*
* Copyright (C) 2009, Marvell International Ltd.
diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h
index 6e7b0c7567c0..fb76d9765ce0 100644
--- a/drivers/bluetooth/btmtk.h
+++ b/drivers/bluetooth/btmtk.h
@@ -7,8 +7,12 @@
#define HCI_WMT_MAX_EVENT_SIZE 64
+#define BTMTK_WMT_REG_WRITE 0x1
#define BTMTK_WMT_REG_READ 0x2
+#define MT7921_PINMUX_0 0x70005050
+#define MT7921_PINMUX_1 0x70005054
+
enum {
BTMTK_WMT_PATCH_DWNLD = 0x1,
BTMTK_WMT_TEST = 0x2,
@@ -68,6 +72,37 @@ struct btmtk_tci_sleep {
u8 time_compensation;
} __packed;
+struct btmtk_wakeon {
+ u8 mode;
+ u8 gpo;
+ u8 active_high;
+ __le16 enable_delay;
+ __le16 wakeup_delay;
+} __packed;
+
+struct btmtk_sco {
+ u8 clock_config;
+ u8 transmit_format_config;
+ u8 channel_format_config;
+ u8 channel_select_config;
+} __packed;
+
+struct reg_read_cmd {
+ u8 type;
+ u8 rsv;
+ u8 num;
+ __le32 addr;
+} __packed;
+
+struct reg_write_cmd {
+ u8 type;
+ u8 rsv;
+ u8 num;
+ __le32 addr;
+ __le32 data;
+ __le32 mask;
+} __packed;
+
struct btmtk_hci_wmt_params {
u8 op;
u8 flag;
diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c
index b5ea8d3bffaa..8be763ab3bf4 100644
--- a/drivers/bluetooth/btmtksdio.c
+++ b/drivers/bluetooth/btmtksdio.c
@@ -31,28 +31,32 @@
#define VERSION "0.1"
-#define MTKBTSDIO_AUTOSUSPEND_DELAY 8000
+#define MTKBTSDIO_AUTOSUSPEND_DELAY 1000
-static bool enable_autosuspend;
+static bool enable_autosuspend = true;
struct btmtksdio_data {
const char *fwname;
u16 chipid;
+ bool lp_mbox_supported;
};
static const struct btmtksdio_data mt7663_data = {
.fwname = FIRMWARE_MT7663,
.chipid = 0x7663,
+ .lp_mbox_supported = false,
};
static const struct btmtksdio_data mt7668_data = {
.fwname = FIRMWARE_MT7668,
.chipid = 0x7668,
+ .lp_mbox_supported = false,
};
static const struct btmtksdio_data mt7921_data = {
.fwname = FIRMWARE_MT7961,
.chipid = 0x7921,
+ .lp_mbox_supported = true,
};
static const struct sdio_device_id btmtksdio_table[] = {
@@ -87,8 +91,17 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table);
#define RX_DONE_INT BIT(1)
#define TX_EMPTY BIT(2)
#define TX_FIFO_OVERFLOW BIT(8)
+#define FW_MAILBOX_INT BIT(15)
+#define INT_MASK GENMASK(15, 0)
#define RX_PKT_LEN GENMASK(31, 16)
+#define MTK_REG_CSICR 0xc0
+#define CSICR_CLR_MBOX_ACK BIT(0)
+#define MTK_REG_PH2DSM0R 0xc4
+#define PH2DSM0R_DRIVER_OWN BIT(0)
+#define MTK_REG_PD2HRM0R 0xdc
+#define PD2HRM0R_DRV_OWN BIT(0)
+
#define MTK_REG_CTDR 0x18
#define MTK_REG_CRDR 0x1c
@@ -100,6 +113,7 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table);
#define BTMTKSDIO_TX_WAIT_VND_EVT 1
#define BTMTKSDIO_HW_TX_READY 2
#define BTMTKSDIO_FUNC_ENABLED 3
+#define BTMTKSDIO_PATCH_ENABLED 4
struct mtkbtsdio_hdr {
__le16 len;
@@ -278,6 +292,78 @@ static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev)
return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL);
}
+static u32 btmtksdio_drv_own_query_79xx(struct btmtksdio_dev *bdev)
+{
+ return sdio_readl(bdev->func, MTK_REG_PD2HRM0R, NULL);
+}
+
+static int btmtksdio_fw_pmctrl(struct btmtksdio_dev *bdev)
+{
+ u32 status;
+ int err;
+
+ sdio_claim_host(bdev->func);
+
+ if (bdev->data->lp_mbox_supported &&
+ test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) {
+ sdio_writel(bdev->func, CSICR_CLR_MBOX_ACK, MTK_REG_CSICR,
+ &err);
+ err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev,
+ status, !(status & PD2HRM0R_DRV_OWN),
+ 2000, 1000000);
+ if (err < 0) {
+ bt_dev_err(bdev->hdev, "mailbox ACK not cleared");
+ goto out;
+ }
+ }
+
+ /* Return ownership to the device */
+ sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto out;
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ !(status & C_COM_DRV_OWN), 2000, 1000000);
+
+out:
+ sdio_release_host(bdev->func);
+
+ if (err < 0)
+ bt_dev_err(bdev->hdev, "Cannot return ownership to device");
+
+ return err;
+}
+
+static int btmtksdio_drv_pmctrl(struct btmtksdio_dev *bdev)
+{
+ u32 status;
+ int err;
+
+ sdio_claim_host(bdev->func);
+
+ /* Get ownership from the device */
+ sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+ if (err < 0)
+ goto out;
+
+ err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+ status & C_COM_DRV_OWN, 2000, 1000000);
+
+ if (!err && bdev->data->lp_mbox_supported &&
+ test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state))
+ err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev,
+ status, status & PD2HRM0R_DRV_OWN,
+ 2000, 1000000);
+
+out:
+ sdio_release_host(bdev->func);
+
+ if (err < 0)
+ bt_dev_err(bdev->hdev, "Cannot get ownership from device");
+
+ return err;
+}
+
static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
{
struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
@@ -480,6 +566,13 @@ static void btmtksdio_txrx_work(struct work_struct *work)
* FIFO.
*/
sdio_writel(bdev->func, int_status, MTK_REG_CHISR, NULL);
+ int_status &= INT_MASK;
+
+ if ((int_status & FW_MAILBOX_INT) &&
+ bdev->data->chipid == 0x7921) {
+ sdio_writel(bdev->func, PH2DSM0R_DRIVER_OWN,
+ MTK_REG_PH2DSM0R, 0);
+ }
if (int_status & FW_OWN_BACK_INT)
bt_dev_dbg(bdev->hdev, "Get fw own back");
@@ -531,7 +624,7 @@ static void btmtksdio_interrupt(struct sdio_func *func)
static int btmtksdio_open(struct hci_dev *hdev)
{
struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
- u32 status, val;
+ u32 val;
int err;
sdio_claim_host(bdev->func);
@@ -542,18 +635,10 @@ static int btmtksdio_open(struct hci_dev *hdev)
set_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state);
- /* Get ownership from the device */
- sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+ err = btmtksdio_drv_pmctrl(bdev);
if (err < 0)
goto err_disable_func;
- err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
- status & C_COM_DRV_OWN, 2000, 1000000);
- if (err < 0) {
- bt_dev_err(bdev->hdev, "Cannot get ownership from device");
- goto err_disable_func;
- }
-
/* Disable interrupt & mask out all interrupt sources */
sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err);
if (err < 0)
@@ -623,8 +708,6 @@ err_release_host:
static int btmtksdio_close(struct hci_dev *hdev)
{
struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
- u32 status;
- int err;
sdio_claim_host(bdev->func);
@@ -635,13 +718,7 @@ static int btmtksdio_close(struct hci_dev *hdev)
cancel_work_sync(&bdev->txrx_work);
- /* Return ownership to the device */
- sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL);
-
- err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
- !(status & C_COM_DRV_OWN), 2000, 1000000);
- if (err < 0)
- bt_dev_err(bdev->hdev, "Cannot return ownership to device");
+ btmtksdio_fw_pmctrl(bdev);
clear_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state);
sdio_disable_func(bdev->func);
@@ -686,6 +763,7 @@ static int btmtksdio_func_query(struct hci_dev *hdev)
static int mt76xx_setup(struct hci_dev *hdev, const char *fwname)
{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
struct btmtk_hci_wmt_params wmt_params;
struct btmtk_tci_sleep tci_sleep;
struct sk_buff *skb;
@@ -746,6 +824,8 @@ ignore_setup_fw:
return err;
}
+ set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state);
+
ignore_func_on:
/* Apply the low power environment setup */
tci_sleep.mode = 0x5;
@@ -768,6 +848,7 @@ ignore_func_on:
static int mt79xx_setup(struct hci_dev *hdev, const char *fwname)
{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
struct btmtk_hci_wmt_params wmt_params;
u8 param = 0x1;
int err;
@@ -793,19 +874,15 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname)
hci_set_msft_opcode(hdev, 0xFD30);
hci_set_aosp_capable(hdev);
+ set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state);
return err;
}
-static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
+static int btmtksdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
{
struct btmtk_hci_wmt_params wmt_params;
- struct reg_read_cmd {
- u8 type;
- u8 rsv;
- u8 num;
- __le32 addr;
- } __packed reg_read = {
+ struct reg_read_cmd reg_read = {
.type = 1,
.num = 1,
};
@@ -821,7 +898,7 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
err = mtk_hci_wmt_sync(hdev, &wmt_params);
if (err < 0) {
- bt_dev_err(hdev, "Failed to read reg(%d)", err);
+ bt_dev_err(hdev, "Failed to read reg (%d)", err);
return err;
}
@@ -830,6 +907,66 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
return err;
}
+static int btmtksdio_mtk_reg_write(struct hci_dev *hdev, u32 reg, u32 val, u32 mask)
+{
+ struct btmtk_hci_wmt_params wmt_params;
+ const struct reg_write_cmd reg_write = {
+ .type = 1,
+ .num = 1,
+ .addr = cpu_to_le32(reg),
+ .data = cpu_to_le32(val),
+ .mask = cpu_to_le32(mask),
+ };
+ int err, status;
+
+ wmt_params.op = BTMTK_WMT_REGISTER;
+ wmt_params.flag = BTMTK_WMT_REG_WRITE;
+ wmt_params.dlen = sizeof(reg_write);
+ wmt_params.data = &reg_write;
+ wmt_params.status = &status;
+
+ err = mtk_hci_wmt_sync(hdev, &wmt_params);
+ if (err < 0)
+ bt_dev_err(hdev, "Failed to write reg (%d)", err);
+
+ return err;
+}
+
+static int btmtksdio_sco_setting(struct hci_dev *hdev)
+{
+ const struct btmtk_sco sco_setting = {
+ .clock_config = 0x49,
+ .channel_format_config = 0x80,
+ };
+ struct sk_buff *skb;
+ u32 val;
+ int err;
+
+ /* Enable SCO over I2S/PCM for MediaTek chipset */
+ skb = __hci_cmd_sync(hdev, 0xfc72, sizeof(sco_setting),
+ &sco_setting, HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ kfree_skb(skb);
+
+ err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_0, &val);
+ if (err < 0)
+ return err;
+
+ val |= 0x11000000;
+ err = btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_0, val, ~0);
+ if (err < 0)
+ return err;
+
+ err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_1, &val);
+ if (err < 0)
+ return err;
+
+ val |= 0x00000101;
+ return btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_1, val, ~0);
+}
+
static int btmtksdio_setup(struct hci_dev *hdev)
{
struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
@@ -844,13 +981,13 @@ static int btmtksdio_setup(struct hci_dev *hdev)
switch (bdev->data->chipid) {
case 0x7921:
- err = btsdio_mtk_reg_read(hdev, 0x70010200, &dev_id);
+ err = btmtksdio_mtk_reg_read(hdev, 0x70010200, &dev_id);
if (err < 0) {
bt_dev_err(hdev, "Failed to get device id (%d)", err);
return err;
}
- err = btsdio_mtk_reg_read(hdev, 0x80021004, &fw_version);
+ err = btmtksdio_mtk_reg_read(hdev, 0x80021004, &fw_version);
if (err < 0) {
bt_dev_err(hdev, "Failed to get fw version (%d)", err);
return err;
@@ -862,6 +999,22 @@ static int btmtksdio_setup(struct hci_dev *hdev)
err = mt79xx_setup(hdev, fwname);
if (err < 0)
return err;
+
+ err = btmtksdio_fw_pmctrl(bdev);
+ if (err < 0)
+ return err;
+
+ err = btmtksdio_drv_pmctrl(bdev);
+ if (err < 0)
+ return err;
+
+ /* Enable SCO over I2S/PCM */
+ err = btmtksdio_sco_setting(hdev);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to enable SCO setting (%d)", err);
+ return err;
+ }
+
break;
case 0x7663:
case 0x7668:
@@ -958,6 +1111,32 @@ static int btmtksdio_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
return 0;
}
+static bool btmtksdio_sdio_wakeup(struct hci_dev *hdev)
+{
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ bool may_wakeup = device_may_wakeup(bdev->dev);
+ const struct btmtk_wakeon bt_awake = {
+ .mode = 0x1,
+ .gpo = 0,
+ .active_high = 0x1,
+ .enable_delay = cpu_to_le16(0xc80),
+ .wakeup_delay = cpu_to_le16(0x20),
+ };
+
+ if (may_wakeup && bdev->data->chipid == 0x7921) {
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, 0xfc27, sizeof(bt_awake),
+ &bt_awake, HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb))
+ may_wakeup = false;
+
+ kfree_skb(skb);
+ }
+
+ return may_wakeup;
+}
+
static int btmtksdio_probe(struct sdio_func *func,
const struct sdio_device_id *id)
{
@@ -997,6 +1176,7 @@ static int btmtksdio_probe(struct sdio_func *func,
hdev->setup = btmtksdio_setup;
hdev->shutdown = btmtksdio_shutdown;
hdev->send = btmtksdio_send_frame;
+ hdev->wakeup = btmtksdio_sdio_wakeup;
hdev->set_bdaddr = btmtk_set_bdaddr;
SET_HCIDEV_DEV(hdev, &func->dev);
@@ -1032,7 +1212,11 @@ static int btmtksdio_probe(struct sdio_func *func,
*/
pm_runtime_put_noidle(bdev->dev);
- return 0;
+ err = device_init_wakeup(bdev->dev, true);
+ if (err)
+ bt_dev_err(hdev, "failed to initialize device wakeup");
+
+ return err;
}
static void btmtksdio_remove(struct sdio_func *func)
@@ -1058,7 +1242,6 @@ static int btmtksdio_runtime_suspend(struct device *dev)
{
struct sdio_func *func = dev_to_sdio_func(dev);
struct btmtksdio_dev *bdev;
- u32 status;
int err;
bdev = sdio_get_drvdata(func);
@@ -1070,18 +1253,9 @@ static int btmtksdio_runtime_suspend(struct device *dev)
sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER);
- sdio_claim_host(bdev->func);
+ err = btmtksdio_fw_pmctrl(bdev);
- sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
- if (err < 0)
- goto out;
-
- err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
- !(status & C_COM_DRV_OWN), 2000, 1000000);
-out:
- bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err);
-
- sdio_release_host(bdev->func);
+ bt_dev_dbg(bdev->hdev, "status (%d) return ownership to device", err);
return err;
}
@@ -1090,7 +1264,6 @@ static int btmtksdio_runtime_resume(struct device *dev)
{
struct sdio_func *func = dev_to_sdio_func(dev);
struct btmtksdio_dev *bdev;
- u32 status;
int err;
bdev = sdio_get_drvdata(func);
@@ -1100,18 +1273,9 @@ static int btmtksdio_runtime_resume(struct device *dev)
if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state))
return 0;
- sdio_claim_host(bdev->func);
+ err = btmtksdio_drv_pmctrl(bdev);
- sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
- if (err < 0)
- goto out;
-
- err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
- status & C_COM_DRV_OWN, 2000, 1000000);
-out:
- bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err);
-
- sdio_release_host(bdev->func);
+ bt_dev_dbg(bdev->hdev, "status (%d) get ownership from device", err);
return err;
}
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index c2bdd1e6060e..c2030f7e25b4 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -149,6 +149,14 @@ static const struct id_table ic_id_table[] = {
.cfg_name = "rtl_bt/rtl8761bu_config" },
/* 8822C with UART interface */
+ { IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0x8, HCI_UART),
+ .config_needed = true,
+ .has_rom_version = true,
+ .has_msft_ext = true,
+ .fw_name = "rtl_bt/rtl8822cs_fw.bin",
+ .cfg_name = "rtl_bt/rtl8822cs_config" },
+
+ /* 8822C with UART interface */
{ IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0xa, HCI_UART),
.config_needed = true,
.has_rom_version = true,
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index c30d131da784..aefa0ee293f3 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -62,6 +62,7 @@ static struct usb_driver btusb_driver;
#define BTUSB_QCA_WCN6855 0x1000000
#define BTUSB_INTEL_BROKEN_SHUTDOWN_LED 0x2000000
#define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000
+#define BTUSB_INTEL_NO_WBS_SUPPORT 0x8000000
static const struct usb_device_id btusb_table[] = {
/* Generic Bluetooth USB device */
@@ -385,9 +386,11 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
+ BTUSB_INTEL_NO_WBS_SUPPORT |
BTUSB_INTEL_BROKEN_INITIAL_NCMD |
BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED |
+ BTUSB_INTEL_NO_WBS_SUPPORT |
BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
{ USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
{ USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED |
@@ -405,6 +408,8 @@ static const struct usb_device_id blacklist_table[] = {
BTUSB_WIDEBAND_SPEECH },
/* Realtek 8852AE Bluetooth devices */
+ { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK |
@@ -2057,10 +2062,10 @@ static int btusb_setup_csr(struct hci_dev *hdev)
* These controllers are really messed-up.
*
* 1. Their bulk RX endpoint will never report any data unless
- * the device was suspended at least once (yes, really).
+ * the device was suspended at least once (yes, really).
* 2. They will not wakeup when autosuspended and receiving data
- * on their bulk RX endpoint from e.g. a keyboard or mouse
- * (IOW remote-wakeup support is broken for the bulk endpoint).
+ * on their bulk RX endpoint from e.g. a keyboard or mouse
+ * (IOW remote-wakeup support is broken for the bulk endpoint).
*
* To fix 1. enable runtime-suspend, force-suspend the
* HCI and then wake-it up by disabling runtime-suspend.
@@ -3737,6 +3742,9 @@ static int btusb_probe(struct usb_interface *intf,
hdev->send = btusb_send_frame_intel;
hdev->cmd_timeout = btusb_intel_cmd_timeout;
+ if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT)
+ btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT);
+
if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index 34286ffe0568..fdf504b0d265 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -966,6 +966,11 @@ static void h5_btrtl_open(struct h5 *h5)
pm_runtime_enable(&h5->hu->serdev->dev);
}
+ /* The controller needs reset to startup */
+ gpiod_set_value_cansleep(h5->enable_gpio, 0);
+ gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
+ msleep(100);
+
/* The controller needs up to 500ms to wakeup */
gpiod_set_value_cansleep(h5->enable_gpio, 1);
gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index eb1e736efeeb..4eb420a9ed04 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -509,7 +509,7 @@ static int send_command_from_firmware(struct ll_device *lldev,
return 0;
}
-/**
+/*
* download_firmware -
* internal function which parses through the .bts firmware
* script file intreprets SEND, DELAY actions only as of now
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 3b00d82d36cf..4cda890ce647 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -305,6 +305,8 @@ int hci_uart_register_device(struct hci_uart *hu,
if (err)
return err;
+ percpu_init_rwsem(&hu->proto_lock);
+
err = p->open(hu);
if (err)
goto err_open;
@@ -327,7 +329,6 @@ int hci_uart_register_device(struct hci_uart *hu,
INIT_WORK(&hu->init_ready, hci_uart_init_work);
INIT_WORK(&hu->write_work, hci_uart_write_work);
- percpu_init_rwsem(&hu->proto_lock);
/* Only when vendor specific setup callback is provided, consider
* the manufacturer information valid. This avoids filling in the
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 533e476988f2..303c8d32d451 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -19,6 +19,7 @@
#include <linux/in.h>
#include <net/arp.h>
#include <net/ipv6.h>
+#include <net/ndisc.h>
#include <asm/byteorder.h>
#include <net/bonding.h>
#include <net/bond_alb.h>
@@ -1269,6 +1270,27 @@ unwind:
return res;
}
+/* determine if the packet is NA or NS */
+static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
+{
+ struct ipv6hdr *ip6hdr;
+ struct icmp6hdr *hdr;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+ return true;
+
+ ip6hdr = ipv6_hdr(skb);
+ if (ip6hdr->nexthdr != IPPROTO_ICMPV6)
+ return false;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr)))
+ return true;
+
+ hdr = icmp6_hdr(skb);
+ return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
+ hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION;
+}
+
/************************ exported alb functions ************************/
int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1348,8 +1370,11 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
/* Do not TX balance any multicast or broadcast */
if (!is_multicast_ether_addr(eth_data->h_dest)) {
switch (skb->protocol) {
- case htons(ETH_P_IP):
case htons(ETH_P_IPV6):
+ if (alb_determine_nd(skb, bond))
+ break;
+ fallthrough;
+ case htons(ETH_P_IP):
hash_index = bond_xmit_hash(bond, skb);
if (bond->params.tlb_dynamic_lb) {
tx_slave = tlb_choose_channel(bond,
@@ -1432,10 +1457,12 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
break;
}
- if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
+ if (alb_determine_nd(skb, bond)) {
do_tx_balance = false;
break;
}
+
+ /* The IPv6 header is pulled by alb_determine_nd */
/* Additionally, DAD probes should not be tx-balanced as that
* will lead to false positives for duplicate addresses and
* prevent address configuration from working.
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 238b56d77c36..617c2bf8c5a7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}
-static void __net_exit bond_net_exit(struct net *net)
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
- struct bond_net *bn = net_generic(net, bond_net_id);
- struct bonding *bond, *tmp_bond;
+ struct bond_net *bn;
+ struct net *net;
LIST_HEAD(list);
- bond_destroy_sysfs(bn);
+ list_for_each_entry(net, net_list, exit_list) {
+ bn = net_generic(net, bond_net_id);
+ bond_destroy_sysfs(bn);
+ }
/* Kill off any bonds created after unregistering bond rtnl ops */
rtnl_lock();
- list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, &list);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct bonding *bond, *tmp_bond;
+
+ bn = net_generic(net, bond_net_id);
+ list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+ unregister_netdevice_queue(bond->dev, &list);
+ }
unregister_netdevice_many(&list);
rtnl_unlock();
- bond_destroy_proc_dir(bn);
+ list_for_each_entry(net, net_list, exit_list) {
+ bn = net_generic(net, bond_net_id);
+ bond_destroy_proc_dir(bn);
+ }
}
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
- .exit = bond_net_exit,
+ .exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
};
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 46b150e6289e..cfe37be42be4 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
}
/* Destroy the bonding directory under /proc/net, if empty.
- * Caller must hold rtnl_lock.
*/
void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
{
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index c0c91440340a..8d51c1019dcd 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -68,17 +68,7 @@ config NET_DSA_QCA8K
This enables support for the Qualcomm Atheros QCA8K Ethernet
switch chips.
-config NET_DSA_REALTEK_SMI
- tristate "Realtek SMI Ethernet switch family support"
- select NET_DSA_TAG_RTL4_A
- select NET_DSA_TAG_RTL8_4
- select FIXED_PHY
- select IRQ_DOMAIN
- select REALTEK_PHY
- select REGMAP
- help
- This enables support for the Realtek SMI-based switch
- chips, currently only RTL8366RB.
+source "drivers/net/dsa/realtek/Kconfig"
config NET_DSA_SMSC_LAN9303
tristate
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 8da1569a34e6..e73838c12256 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -9,8 +9,6 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o
obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o
obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o
-obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o
-realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
@@ -23,5 +21,6 @@ obj-y += microchip/
obj-y += mv88e6xxx/
obj-y += ocelot/
obj-y += qca/
+obj-y += realtek/
obj-y += sja1105/
obj-y += xrs700x/
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 3867f3d4545f..a3b98992f180 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2186,7 +2186,7 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
{
int ret;
- ret = phy_init_eee(phy, 0);
+ ret = phy_init_eee(phy, false);
if (ret)
return 0;
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6afb5db8244c..cf82b1fa9725 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -712,49 +712,25 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
PHY_BRCM_IDDQ_SUSPEND;
}
-static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void bcm_sf2_sw_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
+ unsigned long *interfaces = config->supported_interfaces;
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
- if (!phy_interface_mode_is_rgmii(state->interface) &&
- state->interface != PHY_INTERFACE_MODE_MII &&
- state->interface != PHY_INTERFACE_MODE_REVMII &&
- state->interface != PHY_INTERFACE_MODE_GMII &&
- state->interface != PHY_INTERFACE_MODE_INTERNAL &&
- state->interface != PHY_INTERFACE_MODE_MOCA) {
- linkmode_zero(supported);
- if (port != core_readl(priv, CORE_IMP0_PRT_ID))
- dev_err(ds->dev,
- "Unsupported interface: %d for port %d\n",
- state->interface, port);
- return;
- }
-
- /* Allow all the expected bits */
- phylink_set(mask, Autoneg);
- phylink_set_port_modes(mask);
- phylink_set(mask, Pause);
- phylink_set(mask, Asym_Pause);
- /* With the exclusion of MII and Reverse MII, we support Gigabit,
- * including Half duplex
- */
- if (state->interface != PHY_INTERFACE_MODE_MII &&
- state->interface != PHY_INTERFACE_MODE_REVMII) {
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseT_Half);
+ if (priv->int_phy_mask & BIT(port)) {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces);
+ } else if (priv->moca_port == port) {
+ __set_bit(PHY_INTERFACE_MODE_MOCA, interfaces);
+ } else {
+ __set_bit(PHY_INTERFACE_MODE_MII, interfaces);
+ __set_bit(PHY_INTERFACE_MODE_REVMII, interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII, interfaces);
+ phy_interface_set_rgmii(interfaces);
}
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
+ config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
}
static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
@@ -1221,7 +1197,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.get_sset_count = bcm_sf2_sw_get_sset_count,
.get_ethtool_phy_stats = b53_get_ethtool_phy_stats,
.get_phy_flags = bcm_sf2_sw_get_phy_flags,
- .phylink_validate = bcm_sf2_sw_validate,
+ .phylink_get_caps = bcm_sf2_sw_get_caps,
.phylink_mac_config = bcm_sf2_sw_mac_config,
.phylink_mac_link_down = bcm_sf2_sw_mac_link_down,
.phylink_mac_link_up = bcm_sf2_sw_mac_link_up,
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 991b9c6b6ce7..5dc9899bc0a6 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -1461,27 +1461,22 @@ static int ksz8_setup(struct dsa_switch *ds)
return 0;
}
-static void ksz8_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void ksz8_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct ksz_device *dev = ds->priv;
if (port == dev->cpu_port) {
- if (state->interface != PHY_INTERFACE_MODE_RMII &&
- state->interface != PHY_INTERFACE_MODE_MII &&
- state->interface != PHY_INTERFACE_MODE_NA)
- goto unsupported;
+ __set_bit(PHY_INTERFACE_MODE_RMII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_MII,
+ config->supported_interfaces);
} else {
- if (state->interface != PHY_INTERFACE_MODE_INTERNAL &&
- state->interface != PHY_INTERFACE_MODE_NA)
- goto unsupported;
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
}
- /* Allow all the expected bits */
- phylink_set_port_modes(mask);
- phylink_set(mask, Autoneg);
+ config->mac_capabilities = MAC_10 | MAC_100;
/* Silicon Errata Sheet (DS80000830A):
* "Port 1 does not respond to received flow control PAUSE frames"
@@ -1489,27 +1484,11 @@ static void ksz8_validate(struct dsa_switch *ds, int port,
* switches.
*/
if (!ksz_is_ksz88x3(dev) || port)
- phylink_set(mask, Pause);
+ config->mac_capabilities |= MAC_SYM_PAUSE;
/* Asym pause is not supported on KSZ8863 and KSZ8873 */
if (!ksz_is_ksz88x3(dev))
- phylink_set(mask, Asym_Pause);
-
- /* 10M and 100M are only supported */
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
-
- return;
-
-unsupported:
- linkmode_zero(supported);
- dev_err(ds->dev, "Unsupported interface: %s, port: %d\n",
- phy_modes(state->interface), port);
+ config->mac_capabilities |= MAC_ASYM_PAUSE;
}
static const struct dsa_switch_ops ksz8_switch_ops = {
@@ -1518,7 +1497,7 @@ static const struct dsa_switch_ops ksz8_switch_ops = {
.setup = ksz8_setup,
.phy_read = ksz_phy_read16,
.phy_write = ksz_phy_write16,
- .phylink_validate = ksz8_validate,
+ .phylink_get_caps = ksz8_get_caps,
.phylink_mac_link_down = ksz_mac_link_down,
.port_enable = ksz_enable_port,
.get_strings = ksz8_get_strings,
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 353b5f981740..a85d990896b0 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -222,9 +222,12 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
(BROADCAST_STORM_VALUE *
BROADCAST_STORM_PROT_RATE) / 100);
- if (dev->synclko_125)
- ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1,
- SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ);
+ data8 = SW_ENABLE_REFCLKO;
+ if (dev->synclko_disable)
+ data8 = 0;
+ else if (dev->synclko_125)
+ data8 = SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ;
+ ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1, data8);
return 0;
}
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 55dbda04ea62..7e33ec73f803 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -434,6 +434,12 @@ int ksz_switch_register(struct ksz_device *dev,
}
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
+ dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
+ "microchip,synclko-disable");
+ if (dev->synclko_125 && dev->synclko_disable) {
+ dev_err(dev->dev, "inconsistent synclko settings\n");
+ return -EINVAL;
+ }
}
ret = dsa_register_switch(dev->ds);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index df8ae59c8525..3db63f62f0a1 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -75,6 +75,7 @@ struct ksz_device {
u32 regs_size;
bool phy_errata_9477;
bool synclko_125;
+ bool synclko_disable;
struct vlan_table *vlan_cache;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index ff3c267d0f26..f74f25f479ed 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2846,7 +2846,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
mcr |= PMCR_RX_FC_EN;
}
- if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+ if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
switch (speed) {
case SPEED_1000:
mcr |= PMCR_FORCE_EEE1G;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8530dbe403f4..5344d0c0647e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -86,12 +86,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
u16 mask, u16 val)
{
+ const unsigned long timeout = jiffies + msecs_to_jiffies(50);
u16 data;
int err;
int i;
- /* There's no bus specific operation to wait for a mask */
- for (i = 0; i < 16; i++) {
+ /* There's no bus specific operation to wait for a mask. Even
+ * if the initial poll takes longer than 50ms, always do at
+ * least one more attempt.
+ */
+ for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) {
err = mv88e6xxx_read(chip, addr, reg, &data);
if (err)
return err;
@@ -99,7 +103,10 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
if ((data & mask) == val)
return 0;
- usleep_range(1000, 2000);
+ if (i < 2)
+ cpu_relax();
+ else
+ usleep_range(1000, 2000);
}
dev_err(chip->dev, "Timeout while waiting for switch\n");
@@ -563,133 +570,249 @@ static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
return 0;
}
-static void mv88e6065_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static const u8 mv88e6185_phy_interface_modes[] = {
+ [MV88E6185_PORT_STS_CMODE_GMII_FD] = PHY_INTERFACE_MODE_GMII,
+ [MV88E6185_PORT_STS_CMODE_MII_100_FD_PS] = PHY_INTERFACE_MODE_MII,
+ [MV88E6185_PORT_STS_CMODE_MII_100] = PHY_INTERFACE_MODE_MII,
+ [MV88E6185_PORT_STS_CMODE_MII_10] = PHY_INTERFACE_MODE_MII,
+ [MV88E6185_PORT_STS_CMODE_SERDES] = PHY_INTERFACE_MODE_1000BASEX,
+ [MV88E6185_PORT_STS_CMODE_1000BASE_X] = PHY_INTERFACE_MODE_1000BASEX,
+ [MV88E6185_PORT_STS_CMODE_PHY] = PHY_INTERFACE_MODE_SGMII,
+};
+
+static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
- if (!phy_interface_mode_is_8023z(state->interface)) {
- /* 10M and 100M are only supported in non-802.3z mode */
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
- }
+ u8 cmode = chip->ports[port].cmode;
+
+ if (cmode < ARRAY_SIZE(mv88e6185_phy_interface_modes) &&
+ mv88e6185_phy_interface_modes[cmode])
+ __set_bit(mv88e6185_phy_interface_modes[cmode],
+ config->supported_interfaces);
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD;
}
-static void mv88e6185_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
-{
- /* FIXME: if the port is in 1000Base-X mode, then it only supports
- * 1000M FD speeds. In this case, CMODE will indicate 5.
+static const u8 mv88e6xxx_phy_interface_modes[] = {
+ [MV88E6XXX_PORT_STS_CMODE_MII_PHY] = PHY_INTERFACE_MODE_MII,
+ [MV88E6XXX_PORT_STS_CMODE_MII] = PHY_INTERFACE_MODE_MII,
+ [MV88E6XXX_PORT_STS_CMODE_GMII] = PHY_INTERFACE_MODE_GMII,
+ [MV88E6XXX_PORT_STS_CMODE_RMII_PHY] = PHY_INTERFACE_MODE_RMII,
+ [MV88E6XXX_PORT_STS_CMODE_RMII] = PHY_INTERFACE_MODE_RMII,
+ [MV88E6XXX_PORT_STS_CMODE_100BASEX] = PHY_INTERFACE_MODE_100BASEX,
+ [MV88E6XXX_PORT_STS_CMODE_1000BASEX] = PHY_INTERFACE_MODE_1000BASEX,
+ [MV88E6XXX_PORT_STS_CMODE_SGMII] = PHY_INTERFACE_MODE_SGMII,
+ /* higher interface modes are not needed here, since ports supporting
+ * them are writable, and so the supported interfaces are filled in the
+ * corresponding .phylink_set_interfaces() implementation below
*/
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
+};
- mv88e6065_phylink_validate(chip, port, mask, state);
+static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported)
+{
+ if (cmode < ARRAY_SIZE(mv88e6xxx_phy_interface_modes) &&
+ mv88e6xxx_phy_interface_modes[cmode])
+ __set_bit(mv88e6xxx_phy_interface_modes[cmode], supported);
+ else if (cmode == MV88E6XXX_PORT_STS_CMODE_RGMII)
+ phy_interface_set_rgmii(supported);
}
-static void mv88e6341_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
- if (port >= 5)
- phylink_set(mask, 2500baseX_Full);
+ unsigned long *supported = config->supported_interfaces;
- /* No ethtool bits for 200Mbps */
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
+ /* Translate the default cmode */
+ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
- mv88e6065_phylink_validate(chip, port, mask, state);
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
}
-static void mv88e6352_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip)
{
- /* No ethtool bits for 200Mbps */
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
+ u16 reg, val;
+ int err;
+
+ err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &reg);
+ if (err)
+ return err;
+
+ /* If PHY_DETECT is zero, then we are not in auto-media mode */
+ if (!(reg & MV88E6XXX_PORT_STS_PHY_DETECT))
+ return 0xf;
+
+ val = reg & ~MV88E6XXX_PORT_STS_PHY_DETECT;
+ err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, val);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &val);
+ if (err)
+ return err;
+
+ /* Restore PHY_DETECT value */
+ err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, reg);
+ if (err)
+ return err;
- mv88e6065_phylink_validate(chip, port, mask, state);
+ return val & MV88E6XXX_PORT_STS_CMODE_MASK;
}
-static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
- if (port >= 9) {
- phylink_set(mask, 2500baseX_Full);
- phylink_set(mask, 2500baseT_Full);
+ unsigned long *supported = config->supported_interfaces;
+ int err, cmode;
+
+ /* Translate the default cmode */
+ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD;
+
+ /* Port 4 supports automedia if the serdes is associated with it. */
+ if (port == 4) {
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ if (err < 0)
+ dev_err(chip->dev, "p%d: failed to read scratch\n",
+ port);
+ if (err <= 0)
+ goto unlock;
+
+ cmode = mv88e6352_get_port4_serdes_cmode(chip);
+ if (cmode < 0)
+ dev_err(chip->dev, "p%d: failed to read serdes cmode\n",
+ port);
+ else
+ mv88e6xxx_translate_cmode(cmode, supported);
+unlock:
+ mv88e6xxx_reg_unlock(chip);
}
+}
+
+static void mv88e6341_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
+{
+ unsigned long *supported = config->supported_interfaces;
+
+ /* Translate the default cmode */
+ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
/* No ethtool bits for 200Mbps */
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD;
+
+ /* The C_Mode field is programmable on port 5 */
+ if (port == 5) {
+ __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
- mv88e6065_phylink_validate(chip, port, mask, state);
+ config->mac_capabilities |= MAC_2500FD;
+ }
}
-static void mv88e6390x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static void mv88e6390_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
- if (port >= 9) {
- phylink_set(mask, 10000baseT_Full);
- phylink_set(mask, 10000baseKR_Full);
+ unsigned long *supported = config->supported_interfaces;
+
+ /* Translate the default cmode */
+ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+ /* No ethtool bits for 200Mbps */
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD;
+
+ /* The C_Mode field is programmable on ports 9 and 10 */
+ if (port == 9 || port == 10) {
+ __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
+
+ config->mac_capabilities |= MAC_2500FD;
}
+}
+
+static void mv88e6390x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
+{
+ unsigned long *supported = config->supported_interfaces;
- mv88e6390_phylink_validate(chip, port, mask, state);
+ mv88e6390_phylink_get_caps(chip, port, config);
+
+ /* For the 6x90X, ports 2-7 can be in automedia mode.
+ * (Note that 6x90 doesn't support RXAUI nor XAUI).
+ *
+ * Port 2 can also support 1000BASE-X in automedia mode if port 9 is
+ * configured for 1000BASE-X, SGMII or 2500BASE-X.
+ * Port 3-4 can also support 1000BASE-X in automedia mode if port 9 is
+ * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X.
+ *
+ * Port 5 can also support 1000BASE-X in automedia mode if port 10 is
+ * configured for 1000BASE-X, SGMII or 2500BASE-X.
+ * Port 6-7 can also support 1000BASE-X in automedia mode if port 10 is
+ * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X.
+ *
+ * For now, be permissive (as the old code was) and allow 1000BASE-X
+ * on ports 2..7.
+ */
+ if (port >= 2 && port <= 7)
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+
+ /* The C_Mode field can also be programmed for 10G speeds */
+ if (port == 9 || port == 10) {
+ __set_bit(PHY_INTERFACE_MODE_XAUI, supported);
+ __set_bit(PHY_INTERFACE_MODE_RXAUI, supported);
+
+ config->mac_capabilities |= MAC_10000FD;
+ }
}
-static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state)
+static void mv88e6393x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
+ unsigned long *supported = config->supported_interfaces;
bool is_6191x =
chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X;
- if (((port == 0 || port == 9) && !is_6191x) || port == 10) {
- phylink_set(mask, 10000baseT_Full);
- phylink_set(mask, 10000baseKR_Full);
- phylink_set(mask, 10000baseCR_Full);
- phylink_set(mask, 10000baseSR_Full);
- phylink_set(mask, 10000baseLR_Full);
- phylink_set(mask, 10000baseLRM_Full);
- phylink_set(mask, 10000baseER_Full);
- phylink_set(mask, 5000baseT_Full);
- phylink_set(mask, 2500baseX_Full);
- phylink_set(mask, 2500baseT_Full);
- }
+ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD;
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
+ /* The C_Mode field can be programmed for ports 0, 9 and 10 */
+ if (port == 0 || port == 9 || port == 10) {
+ __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
- mv88e6065_phylink_validate(chip, port, mask, state);
+ /* 6191X supports >1G modes only on port 10 */
+ if (!is_6191x || port == 10) {
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
+ __set_bit(PHY_INTERFACE_MODE_5GBASER, supported);
+ __set_bit(PHY_INTERFACE_MODE_10GBASER, supported);
+ /* FIXME: USXGMII is not supported yet */
+ /* __set_bit(PHY_INTERFACE_MODE_USXGMII, supported); */
+
+ config->mac_capabilities |= MAC_2500FD | MAC_5000FD |
+ MAC_10000FD;
+ }
+ }
}
-static void mv88e6xxx_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct mv88e6xxx_chip *chip = ds->priv;
- /* Allow all the expected bits */
- phylink_set(mask, Autoneg);
- phylink_set(mask, Pause);
- phylink_set_port_modes(mask);
+ chip->info->ops->phylink_get_caps(chip, port, config);
- if (chip->info->ops->phylink_validate)
- chip->info->ops->phylink_validate(chip, port, mask, state);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
-
- /* We can only operate at 2500BaseX or 1000BaseX. If requested
- * to advertise both, only report advertising at 2500BaseX.
- */
- phylink_helper_basex_speed(state);
+ /* Internal ports need GMII for PHYLIB */
+ if (mv88e6xxx_phy_is_internal(ds, port))
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
}
static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
@@ -1283,8 +1406,15 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
pvlan = 0;
- /* Frames from user ports can egress any local DSA links and CPU ports,
- * as well as any local member of their bridge group.
+ /* Frames from standalone user ports can only egress on the
+ * upstream port.
+ */
+ if (!dsa_port_bridge_dev_get(dp))
+ return BIT(dsa_switch_upstream_port(ds));
+
+ /* Frames from bridged user ports can egress any local DSA
+ * links and CPU ports, as well as any local member of their
+ * bridge group.
*/
dsa_switch_for_each_port(other_dp, ds)
if (other_dp->type == DSA_PORT_TYPE_CPU ||
@@ -1616,21 +1746,11 @@ static int mv88e6xxx_fid_map_vlan(struct mv88e6xxx_chip *chip,
int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
{
- int i, err;
- u16 fid;
-
bitmap_zero(fid_bitmap, MV88E6XXX_N_FID);
- /* Set every FID bit used by the (un)bridged ports */
- for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
- err = mv88e6xxx_port_get_fid(chip, i, &fid);
- if (err)
- return err;
-
- set_bit(fid, fid_bitmap);
- }
-
- /* Set every FID bit used by the VLAN entries */
+ /* Every FID has an associated VID, so walking the VTU
+ * will discover the full set of FIDs in use.
+ */
return mv88e6xxx_vtu_walk(chip, mv88e6xxx_fid_map_vlan, fid_bitmap);
}
@@ -1643,10 +1763,7 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
if (err)
return err;
- /* The reset value 0x000 is used to indicate that multiple address
- * databases are not needed. Return the next positive available.
- */
- *fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1);
+ *fid = find_first_zero_bit(fid_bitmap, MV88E6XXX_N_FID);
if (unlikely(*fid >= mv88e6xxx_num_databases(chip)))
return -ENOSPC;
@@ -2138,6 +2255,9 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
if (!vlan.valid) {
memset(&vlan, 0, sizeof(vlan));
+ if (vid == MV88E6XXX_VID_STANDALONE)
+ vlan.policy = true;
+
err = mv88e6xxx_atu_new(chip, &vlan.fid);
if (err)
return err;
@@ -2480,6 +2600,10 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
if (err)
goto unlock;
+ err = mv88e6xxx_port_set_map_da(chip, port, true);
+ if (err)
+ goto unlock;
+
err = mv88e6xxx_port_commit_pvid(chip, port);
if (err)
goto unlock;
@@ -2514,6 +2638,12 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
mv88e6xxx_port_vlan_map(chip, port))
dev_err(ds->dev, "failed to remap in-chip Port VLAN\n");
+ err = mv88e6xxx_port_set_map_da(chip, port, false);
+ if (err)
+ dev_err(ds->dev,
+ "port %d failed to restore map-DA: %pe\n",
+ port, ERR_PTR(err));
+
err = mv88e6xxx_port_commit_pvid(chip, port);
if (err)
dev_err(ds->dev,
@@ -2911,12 +3041,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
return err;
/* Port Control 2: don't force a good FCS, set the MTU size to
- * 10222 bytes, disable 802.1q tags checking, don't discard tagged or
- * untagged frames on this port, do a destination address lookup on all
- * received packets as usual, disable ARP mirroring and don't send a
- * copy of all transmitted/received frames on this port to the CPU.
+ * 10222 bytes, disable 802.1q tags checking, don't discard
+ * tagged or untagged frames on this port, skip destination
+ * address lookup on user ports, disable ARP mirroring and don't
+ * send a copy of all transmitted/received frames on this port
+ * to the CPU.
*/
- err = mv88e6xxx_port_set_map_da(chip, port);
+ err = mv88e6xxx_port_set_map_da(chip, port, !dsa_is_user_port(ds, port));
if (err)
return err;
@@ -2924,8 +3055,44 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
if (err)
return err;
+ /* On chips that support it, set all downstream DSA ports'
+ * VLAN policy to TRAP. In combination with loading
+ * MV88E6XXX_VID_STANDALONE as a policy entry in the VTU, this
+ * provides a better isolation barrier between standalone
+ * ports, as the ATU is bypassed on any intermediate switches
+ * between the incoming port and the CPU.
+ */
+ if (dsa_is_downstream_port(ds, port) &&
+ chip->info->ops->port_set_policy) {
+ err = chip->info->ops->port_set_policy(chip, port,
+ MV88E6XXX_POLICY_MAPPING_VTU,
+ MV88E6XXX_POLICY_ACTION_TRAP);
+ if (err)
+ return err;
+ }
+
+ /* User ports start out in standalone mode and 802.1Q is
+ * therefore disabled. On DSA ports, all valid VIDs are always
+ * loaded in the VTU - therefore, enable 802.1Q in order to take
+ * advantage of VLAN policy on chips that supports it.
+ */
err = mv88e6xxx_port_set_8021q_mode(chip, port,
- MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED);
+ dsa_is_user_port(ds, port) ?
+ MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED :
+ MV88E6XXX_PORT_CTL2_8021Q_MODE_SECURE);
+ if (err)
+ return err;
+
+ /* Bind MV88E6XXX_VID_STANDALONE to MV88E6XXX_FID_STANDALONE by
+ * virtue of the fact that mv88e6xxx_atu_new() will pick it as
+ * the first free FID. This will be used as the private PVID for
+ * unbridged ports. Shared (DSA and CPU) ports must also be
+ * members of this VID, in order to trap all frames assigned to
+ * it to the CPU.
+ */
+ err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_STANDALONE,
+ MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED,
+ false);
if (err)
return err;
@@ -2938,7 +3105,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
* relying on their port default FID.
*/
err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED,
- MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED,
+ MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED,
false);
if (err)
return err;
@@ -3582,7 +3749,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
.rmu_disable = mv88e6085_g1_rmu_disable,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -3616,7 +3783,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.reset = mv88e6185_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -3632,6 +3799,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.port_sync_link = mv88e6185_port_sync_link,
.port_set_speed_duplex = mv88e6185_port_set_speed_duplex,
.port_tag_remap = mv88e6095_port_tag_remap,
+ .port_set_policy = mv88e6352_port_set_policy,
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
.port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
@@ -3662,7 +3830,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.rmu_disable = mv88e6085_g1_rmu_disable,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -3699,7 +3867,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
.atu_set_hash = mv88e6165_g1_atu_set_hash,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -3740,7 +3908,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
.reset = mv88e6185_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6141_ops = {
@@ -3804,7 +3972,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
- .phylink_validate = mv88e6341_phylink_validate,
+ .phylink_get_caps = mv88e6341_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -3846,7 +4014,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.avb_ops = &mv88e6165_avb_ops,
.ptp_ops = &mv88e6165_ptp_ops,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -3882,7 +4050,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.avb_ops = &mv88e6165_avb_ops,
.ptp_ops = &mv88e6165_ptp_ops,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6171_ops = {
@@ -3924,7 +4092,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
.atu_set_hash = mv88e6165_g1_atu_set_hash,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6172_ops = {
@@ -3979,7 +4147,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
- .phylink_validate = mv88e6352_phylink_validate,
+ .phylink_get_caps = mv88e6352_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -4021,7 +4189,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
.atu_set_hash = mv88e6165_g1_atu_set_hash,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6176_ops = {
@@ -4079,7 +4247,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
- .phylink_validate = mv88e6352_phylink_validate,
+ .phylink_get_caps = mv88e6352_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -4118,7 +4286,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
.reset = mv88e6185_g1_reset,
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
@@ -4180,7 +4348,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
- .phylink_validate = mv88e6390_phylink_validate,
+ .phylink_get_caps = mv88e6390_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -4241,7 +4409,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
.gpio_ops = &mv88e6352_gpio_ops,
- .phylink_validate = mv88e6390x_phylink_validate,
+ .phylink_get_caps = mv88e6390x_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -4301,7 +4469,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
.serdes_get_regs = mv88e6390_serdes_get_regs,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6390_phylink_validate,
+ .phylink_get_caps = mv88e6390_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6240_ops = {
@@ -4361,7 +4529,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6352_phylink_validate,
+ .phylink_get_caps = mv88e6352_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6250_ops = {
@@ -4401,7 +4569,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6250_ptp_ops,
- .phylink_validate = mv88e6065_phylink_validate,
+ .phylink_get_caps = mv88e6250_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -4463,7 +4631,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6390_phylink_validate,
+ .phylink_get_caps = mv88e6390_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -4507,7 +4675,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -4549,7 +4717,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -4615,7 +4783,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
- .phylink_validate = mv88e6341_phylink_validate,
+ .phylink_get_caps = mv88e6341_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -4657,7 +4825,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
.atu_set_hash = mv88e6165_g1_atu_set_hash,
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6351_ops = {
@@ -4701,7 +4869,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.avb_ops = &mv88e6352_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6185_phylink_validate,
+ .phylink_get_caps = mv88e6185_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -4764,7 +4932,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
.serdes_get_stats = mv88e6352_serdes_get_stats,
.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
.serdes_get_regs = mv88e6352_serdes_get_regs,
- .phylink_validate = mv88e6352_phylink_validate,
+ .phylink_get_caps = mv88e6352_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -4829,7 +4997,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
.serdes_get_stats = mv88e6390_serdes_get_stats,
.serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
.serdes_get_regs = mv88e6390_serdes_get_regs,
- .phylink_validate = mv88e6390_phylink_validate,
+ .phylink_get_caps = mv88e6390_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -4893,7 +5061,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6390x_phylink_validate,
+ .phylink_get_caps = mv88e6390x_phylink_get_caps,
};
static const struct mv88e6xxx_ops mv88e6393x_ops = {
@@ -4957,7 +5125,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
- .phylink_validate = mv88e6393x_phylink_validate,
+ .phylink_get_caps = mv88e6393x_phylink_get_caps,
};
static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -6226,7 +6394,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.teardown = mv88e6xxx_teardown,
.port_setup = mv88e6xxx_port_setup,
.port_teardown = mv88e6xxx_port_teardown,
- .phylink_validate = mv88e6xxx_validate,
+ .phylink_get_caps = mv88e6xxx_get_caps,
.phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state,
.phylink_mac_config = mv88e6xxx_mac_config,
.phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 8271b8aa7b71..12aa637779f5 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -179,6 +179,7 @@ struct mv88e6xxx_vtu_entry {
u16 fid;
u8 sid;
bool valid;
+ bool policy;
u8 member[DSA_MAX_PORTS];
u8 state[DSA_MAX_PORTS];
};
@@ -392,6 +393,7 @@ struct mv88e6xxx_chip {
struct mv88e6xxx_bus_ops {
int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+ int (*init)(struct mv88e6xxx_chip *chip);
};
struct mv88e6xxx_mdio_bus {
@@ -609,9 +611,8 @@ struct mv88e6xxx_ops {
const struct mv88e6xxx_ptp_ops *ptp_ops;
/* Phylink */
- void (*phylink_validate)(struct mv88e6xxx_chip *chip, int port,
- unsigned long *mask,
- struct phylink_link_state *state);
+ void (*phylink_get_caps)(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config);
/* Max Frame Size */
int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu);
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 4f3dbb015f77..2c1607c858a1 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -46,6 +46,7 @@
/* Offset 0x02: VTU FID Register */
#define MV88E6352_G1_VTU_FID 0x02
+#define MV88E6352_G1_VTU_FID_VID_POLICY 0x1000
#define MV88E6352_G1_VTU_FID_MASK 0x0fff
/* Offset 0x03: VTU SID Register */
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index ae12c981923e..b1bd9274a562 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -27,7 +27,7 @@ static int mv88e6xxx_g1_vtu_fid_read(struct mv88e6xxx_chip *chip,
return err;
entry->fid = val & MV88E6352_G1_VTU_FID_MASK;
-
+ entry->policy = !!(val & MV88E6352_G1_VTU_FID_VID_POLICY);
return 0;
}
@@ -36,6 +36,9 @@ static int mv88e6xxx_g1_vtu_fid_write(struct mv88e6xxx_chip *chip,
{
u16 val = entry->fid & MV88E6352_G1_VTU_FID_MASK;
+ if (entry->policy)
+ val |= MV88E6352_G1_VTU_FID_VID_POLICY;
+
return mv88e6xxx_g1_write(chip, MV88E6352_G1_VTU_FID, val);
}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index f3e27573a386..807aeaad9830 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -299,6 +299,8 @@
#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU BIT(2)
#define MV88E6352_G2_SCRATCH_CONFIG_DATA2 0x72
#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK 0x3
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA3 0x73
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL BIT(1)
#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0
#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1
@@ -370,6 +372,7 @@ extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
bool external);
+int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port);
int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin);
int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats);
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index eda710062933..a9d6e40321a2 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -289,3 +289,31 @@ int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
}
+
+/**
+ * mv88e6352_g2_scratch_port_has_serdes - indicate if a port can have a serdes
+ * @chip: chip private data
+ * @port: port number to check for serdes
+ *
+ * Indicates whether the port may have a serdes attached according to the
+ * pin strapping. Returns negative error number, 0 if the port is not
+ * configured to have a serdes, and 1 if the port is configured to have a
+ * serdes attached.
+ */
+int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
+{
+ u8 config3, p;
+ int err;
+
+ err = mv88e6xxx_g2_scratch_read(chip, MV88E6352_G2_SCRATCH_CONFIG_DATA3,
+ &config3);
+ if (err)
+ return err;
+
+ if (config3 & MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL)
+ p = 5;
+ else
+ p = 4;
+
+ return port == p;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index ab41619a809b..ceb450113f88 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -1278,7 +1278,7 @@ int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port,
return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, new);
}
-int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port)
+int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map)
{
u16 reg;
int err;
@@ -1287,7 +1287,10 @@ int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port)
if (err)
return err;
- reg |= MV88E6XXX_PORT_CTL2_MAP_DA;
+ if (map)
+ reg |= MV88E6XXX_PORT_CTL2_MAP_DA;
+ else
+ reg &= ~MV88E6XXX_PORT_CTL2_MAP_DA;
return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg);
}
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 03382b66f800..3a13db2ec27b 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -42,6 +42,11 @@
#define MV88E6XXX_PORT_STS_TX_PAUSED 0x0020
#define MV88E6XXX_PORT_STS_FLOW_CTL 0x0010
#define MV88E6XXX_PORT_STS_CMODE_MASK 0x000f
+#define MV88E6XXX_PORT_STS_CMODE_MII_PHY 0x0001
+#define MV88E6XXX_PORT_STS_CMODE_MII 0x0002
+#define MV88E6XXX_PORT_STS_CMODE_GMII 0x0003
+#define MV88E6XXX_PORT_STS_CMODE_RMII_PHY 0x0004
+#define MV88E6XXX_PORT_STS_CMODE_RMII 0x0005
#define MV88E6XXX_PORT_STS_CMODE_RGMII 0x0007
#define MV88E6XXX_PORT_STS_CMODE_100BASEX 0x0008
#define MV88E6XXX_PORT_STS_CMODE_1000BASEX 0x0009
@@ -425,7 +430,7 @@ int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port,
bool drop_untagged);
-int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map);
int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
int upstream_port);
int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 2b05ead515cd..6a177bf654ee 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -272,14 +272,6 @@ int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
return lane;
}
-static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
-{
- if (mv88e6xxx_serdes_get_lane(chip, port) >= 0)
- return true;
-
- return false;
-}
-
struct mv88e6352_serdes_hw_stat {
char string[ETH_GSTRING_LEN];
int sizeof_stat;
@@ -293,20 +285,24 @@ static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
{
- if (mv88e6352_port_has_serdes(chip, port))
- return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+ int err;
- return 0;
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ if (err <= 0)
+ return err;
+
+ return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
}
int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
int port, uint8_t *data)
{
struct mv88e6352_serdes_hw_stat *stat;
- int i;
+ int err, i;
- if (!mv88e6352_port_has_serdes(chip, port))
- return 0;
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ if (err <= 0)
+ return err;
for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
stat = &mv88e6352_serdes_hw_stats[i];
@@ -348,11 +344,12 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
{
struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
struct mv88e6352_serdes_hw_stat *stat;
+ int i, err;
u64 value;
- int i;
- if (!mv88e6352_port_has_serdes(chip, port))
- return 0;
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ if (err <= 0)
+ return err;
BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
@@ -419,8 +416,13 @@ unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
int mv88e6352_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port)
{
- if (!mv88e6352_port_has_serdes(chip, port))
- return 0;
+ int err;
+
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ mv88e6xxx_reg_unlock(chip);
+ if (err <= 0)
+ return err;
return 32 * sizeof(u16);
}
@@ -432,7 +434,8 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
int err;
int i;
- if (!mv88e6352_port_has_serdes(chip, port))
+ err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+ if (err <= 0)
return;
for (i = 0 ; i < 32; i++) {
diff --git a/drivers/net/dsa/mv88e6xxx/smi.c b/drivers/net/dsa/mv88e6xxx/smi.c
index 282fe08db050..a990271b7482 100644
--- a/drivers/net/dsa/mv88e6xxx/smi.c
+++ b/drivers/net/dsa/mv88e6xxx/smi.c
@@ -55,11 +55,15 @@ static int mv88e6xxx_smi_direct_write(struct mv88e6xxx_chip *chip,
static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
int dev, int reg, int bit, int val)
{
+ const unsigned long timeout = jiffies + msecs_to_jiffies(50);
u16 data;
int err;
int i;
- for (i = 0; i < 16; i++) {
+ /* Even if the initial poll takes longer than 50ms, always do
+ * at least one more attempt.
+ */
+ for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) {
err = mv88e6xxx_smi_direct_read(chip, dev, reg, &data);
if (err)
return err;
@@ -67,7 +71,10 @@ static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
if (!!(data & BIT(bit)) == !!val)
return 0;
- usleep_range(1000, 2000);
+ if (i < 2)
+ cpu_relax();
+ else
+ usleep_range(1000, 2000);
}
return -ETIMEDOUT;
@@ -104,11 +111,6 @@ static int mv88e6xxx_smi_indirect_read(struct mv88e6xxx_chip *chip,
{
int err;
- err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
- MV88E6XXX_SMI_CMD, 15, 0);
- if (err)
- return err;
-
err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr,
MV88E6XXX_SMI_CMD,
MV88E6XXX_SMI_CMD_BUSY |
@@ -132,11 +134,6 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip,
{
int err;
- err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
- MV88E6XXX_SMI_CMD, 15, 0);
- if (err)
- return err;
-
err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr,
MV88E6XXX_SMI_DATA, data);
if (err)
@@ -155,9 +152,20 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip,
MV88E6XXX_SMI_CMD, 15, 0);
}
+static int mv88e6xxx_smi_indirect_init(struct mv88e6xxx_chip *chip)
+{
+ /* Ensure that the chip starts out in the ready state. As both
+ * reads and writes always ensure this on return, they can
+ * safely depend on the chip not being busy on entry.
+ */
+ return mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
+ MV88E6XXX_SMI_CMD, 15, 0);
+}
+
static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_indirect_ops = {
.read = mv88e6xxx_smi_indirect_read,
.write = mv88e6xxx_smi_indirect_write,
+ .init = mv88e6xxx_smi_indirect_init,
};
int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
@@ -175,5 +183,8 @@ int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
chip->bus = bus;
chip->sw_addr = sw_addr;
+ if (chip->smi_ops->init)
+ return chip->smi_ops->init(chip);
+
return 0;
}
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index c39de2a4c1fe..e5098cfe44bc 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -499,52 +499,27 @@ static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds,
return DSA_TAG_PROTO_AR9331;
}
-static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void ar9331_sw_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+ config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
switch (port) {
case 0:
- if (state->interface != PHY_INTERFACE_MODE_GMII)
- goto unsupported;
-
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseT_Half);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
+ config->mac_capabilities |= MAC_1000;
break;
case 1:
case 2:
case 3:
case 4:
case 5:
- if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
- goto unsupported;
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
break;
- default:
- linkmode_zero(supported);
- dev_err(ds->dev, "Unsupported port: %i\n", port);
- return;
}
-
- phylink_set_port_modes(mask);
- phylink_set(mask, Pause);
- phylink_set(mask, Asym_Pause);
-
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
-
- return;
-
-unsupported:
- linkmode_zero(supported);
- dev_err(ds->dev, "Unsupported interface: %d, port: %d\n",
- state->interface, port);
}
static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -697,7 +672,7 @@ static const struct dsa_switch_ops ar9331_sw_ops = {
.get_tag_protocol = ar9331_sw_get_tag_protocol,
.setup = ar9331_sw_setup,
.port_disable = ar9331_sw_port_disable,
- .phylink_validate = ar9331_sw_phylink_validate,
+ .phylink_get_caps = ar9331_sw_phylink_get_caps,
.phylink_mac_config = ar9331_sw_phylink_mac_config,
.phylink_mac_link_down = ar9331_sw_phylink_mac_link_down,
.phylink_mac_link_up = ar9331_sw_phylink_mac_link_up,
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 039694518788..c09d1569e66b 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -20,6 +20,7 @@
#include <linux/phylink.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
+#include <linux/dsa/tag_qca.h>
#include "qca8k.h"
@@ -74,12 +75,6 @@ static const struct qca8k_mib_desc ar8327_mib[] = {
MIB_DESC(1, 0xac, "TXUnicast"),
};
-/* The 32bit switch registers are accessed indirectly. To achieve this we need
- * to set the page of the register. Track the last page that was set to reduce
- * mdio writes
- */
-static u16 qca8k_current_page = 0xffff;
-
static void
qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
{
@@ -94,6 +89,44 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
}
static int
+qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo)
+{
+ u16 *cached_lo = &priv->mdio_cache.lo;
+ struct mii_bus *bus = priv->bus;
+ int ret;
+
+ if (lo == *cached_lo)
+ return 0;
+
+ ret = bus->write(bus, phy_id, regnum, lo);
+ if (ret < 0)
+ dev_err_ratelimited(&bus->dev,
+ "failed to write qca8k 32bit lo register\n");
+
+ *cached_lo = lo;
+ return 0;
+}
+
+static int
+qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi)
+{
+ u16 *cached_hi = &priv->mdio_cache.hi;
+ struct mii_bus *bus = priv->bus;
+ int ret;
+
+ if (hi == *cached_hi)
+ return 0;
+
+ ret = bus->write(bus, phy_id, regnum, hi);
+ if (ret < 0)
+ dev_err_ratelimited(&bus->dev,
+ "failed to write qca8k 32bit hi register\n");
+
+ *cached_hi = hi;
+ return 0;
+}
+
+static int
qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
{
int ret;
@@ -116,7 +149,7 @@ qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
}
static void
-qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
+qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val)
{
u16 lo, hi;
int ret;
@@ -124,20 +157,19 @@ qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
lo = val & 0xffff;
hi = (u16)(val >> 16);
- ret = bus->write(bus, phy_id, regnum, lo);
+ ret = qca8k_set_lo(priv, phy_id, regnum, lo);
if (ret >= 0)
- ret = bus->write(bus, phy_id, regnum + 1, hi);
- if (ret < 0)
- dev_err_ratelimited(&bus->dev,
- "failed to write qca8k 32bit register\n");
+ ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi);
}
static int
-qca8k_set_page(struct mii_bus *bus, u16 page)
+qca8k_set_page(struct qca8k_priv *priv, u16 page)
{
+ u16 *cached_page = &priv->mdio_cache.page;
+ struct mii_bus *bus = priv->bus;
int ret;
- if (page == qca8k_current_page)
+ if (page == *cached_page)
return 0;
ret = bus->write(bus, 0x18, 0, page);
@@ -147,7 +179,7 @@ qca8k_set_page(struct mii_bus *bus, u16 page)
return ret;
}
- qca8k_current_page = page;
+ *cached_page = page;
usleep_range(1000, 2000);
return 0;
}
@@ -170,6 +202,252 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
return regmap_update_bits(priv->regmap, reg, mask, write_val);
}
+static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb)
+{
+ struct qca8k_mgmt_eth_data *mgmt_eth_data;
+ struct qca8k_priv *priv = ds->priv;
+ struct qca_mgmt_ethhdr *mgmt_ethhdr;
+ u8 len, cmd;
+
+ mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb_mac_header(skb);
+ mgmt_eth_data = &priv->mgmt_eth_data;
+
+ cmd = FIELD_GET(QCA_HDR_MGMT_CMD, mgmt_ethhdr->command);
+ len = FIELD_GET(QCA_HDR_MGMT_LENGTH, mgmt_ethhdr->command);
+
+ /* Make sure the seq match the requested packet */
+ if (mgmt_ethhdr->seq == mgmt_eth_data->seq)
+ mgmt_eth_data->ack = true;
+
+ if (cmd == MDIO_READ) {
+ mgmt_eth_data->data[0] = mgmt_ethhdr->mdio_data;
+
+ /* Get the rest of the 12 byte of data.
+ * The read/write function will extract the requested data.
+ */
+ if (len > QCA_HDR_MGMT_DATA1_LEN)
+ memcpy(mgmt_eth_data->data + 1, skb->data,
+ QCA_HDR_MGMT_DATA2_LEN);
+ }
+
+ complete(&mgmt_eth_data->rw_done);
+}
+
+static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 *val,
+ int priority, unsigned int len)
+{
+ struct qca_mgmt_ethhdr *mgmt_ethhdr;
+ unsigned int real_len;
+ struct sk_buff *skb;
+ u32 *data2;
+ u16 hdr;
+
+ skb = dev_alloc_skb(QCA_HDR_MGMT_PKT_LEN);
+ if (!skb)
+ return NULL;
+
+ /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte
+ * Actually for some reason the steps are:
+ * 0: nothing
+ * 1-4: first 4 byte
+ * 5-6: first 12 byte
+ * 7-15: all 16 byte
+ */
+ if (len == 16)
+ real_len = 15;
+ else
+ real_len = len;
+
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, skb->len);
+
+ mgmt_ethhdr = skb_push(skb, QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN);
+
+ hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION);
+ hdr |= FIELD_PREP(QCA_HDR_XMIT_PRIORITY, priority);
+ hdr |= QCA_HDR_XMIT_FROM_CPU;
+ hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(0));
+ hdr |= FIELD_PREP(QCA_HDR_XMIT_CONTROL, QCA_HDR_XMIT_TYPE_RW_REG);
+
+ mgmt_ethhdr->command = FIELD_PREP(QCA_HDR_MGMT_ADDR, reg);
+ mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_LENGTH, real_len);
+ mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CMD, cmd);
+ mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CHECK_CODE,
+ QCA_HDR_MGMT_CHECK_CODE_VAL);
+
+ if (cmd == MDIO_WRITE)
+ mgmt_ethhdr->mdio_data = *val;
+
+ mgmt_ethhdr->hdr = htons(hdr);
+
+ data2 = skb_put_zero(skb, QCA_HDR_MGMT_DATA2_LEN + QCA_HDR_MGMT_PADDING_LEN);
+ if (cmd == MDIO_WRITE && len > QCA_HDR_MGMT_DATA1_LEN)
+ memcpy(data2, val + 1, len - QCA_HDR_MGMT_DATA1_LEN);
+
+ return skb;
+}
+
+static void qca8k_mdio_header_fill_seq_num(struct sk_buff *skb, u32 seq_num)
+{
+ struct qca_mgmt_ethhdr *mgmt_ethhdr;
+
+ mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb->data;
+ mgmt_ethhdr->seq = FIELD_PREP(QCA_HDR_MGMT_SEQ_NUM, seq_num);
+}
+
+static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+ struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data;
+ struct sk_buff *skb;
+ bool ack;
+ int ret;
+
+ skb = qca8k_alloc_mdio_header(MDIO_READ, reg, NULL,
+ QCA8K_ETHERNET_MDIO_PRIORITY, len);
+ if (!skb)
+ return -ENOMEM;
+
+ mutex_lock(&mgmt_eth_data->mutex);
+
+ /* Check mgmt_master if is operational */
+ if (!priv->mgmt_master) {
+ kfree_skb(skb);
+ mutex_unlock(&mgmt_eth_data->mutex);
+ return -EINVAL;
+ }
+
+ skb->dev = priv->mgmt_master;
+
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the mdio pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(skb);
+
+ ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT));
+
+ *val = mgmt_eth_data->data[0];
+ if (len > QCA_HDR_MGMT_DATA1_LEN)
+ memcpy(val + 1, mgmt_eth_data->data + 1, len - QCA_HDR_MGMT_DATA1_LEN);
+
+ ack = mgmt_eth_data->ack;
+
+ mutex_unlock(&mgmt_eth_data->mutex);
+
+ if (ret <= 0)
+ return -ETIMEDOUT;
+
+ if (!ack)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int qca8k_write_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+ struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data;
+ struct sk_buff *skb;
+ bool ack;
+ int ret;
+
+ skb = qca8k_alloc_mdio_header(MDIO_WRITE, reg, val,
+ QCA8K_ETHERNET_MDIO_PRIORITY, len);
+ if (!skb)
+ return -ENOMEM;
+
+ mutex_lock(&mgmt_eth_data->mutex);
+
+ /* Check mgmt_master if is operational */
+ if (!priv->mgmt_master) {
+ kfree_skb(skb);
+ mutex_unlock(&mgmt_eth_data->mutex);
+ return -EINVAL;
+ }
+
+ skb->dev = priv->mgmt_master;
+
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the mdio pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(skb);
+
+ ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT));
+
+ ack = mgmt_eth_data->ack;
+
+ mutex_unlock(&mgmt_eth_data->mutex);
+
+ if (ret <= 0)
+ return -ETIMEDOUT;
+
+ if (!ack)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+qca8k_regmap_update_bits_eth(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
+{
+ u32 val = 0;
+ int ret;
+
+ ret = qca8k_read_eth(priv, reg, &val, sizeof(val));
+ if (ret)
+ return ret;
+
+ val &= ~mask;
+ val |= write_val;
+
+ return qca8k_write_eth(priv, reg, &val, sizeof(val));
+}
+
+static int
+qca8k_bulk_read(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+ int i, count = len / sizeof(u32), ret;
+
+ if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val, len))
+ return 0;
+
+ for (i = 0; i < count; i++) {
+ ret = regmap_read(priv->regmap, reg + (i * 4), val + i);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+qca8k_bulk_write(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+ int i, count = len / sizeof(u32), ret;
+ u32 tmp;
+
+ if (priv->mgmt_master && !qca8k_write_eth(priv, reg, val, len))
+ return 0;
+
+ for (i = 0; i < count; i++) {
+ tmp = val[i];
+
+ ret = regmap_write(priv->regmap, reg + (i * 4), tmp);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int
qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
{
@@ -178,11 +456,14 @@ qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
u16 r1, r2, page;
int ret;
+ if (!qca8k_read_eth(priv, reg, val, sizeof(*val)))
+ return 0;
+
qca8k_split_addr(reg, &r1, &r2, &page);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
- ret = qca8k_set_page(bus, page);
+ ret = qca8k_set_page(priv, page);
if (ret < 0)
goto exit;
@@ -201,15 +482,18 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
u16 r1, r2, page;
int ret;
+ if (!qca8k_write_eth(priv, reg, &val, sizeof(val)))
+ return 0;
+
qca8k_split_addr(reg, &r1, &r2, &page);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
- ret = qca8k_set_page(bus, page);
+ ret = qca8k_set_page(priv, page);
if (ret < 0)
goto exit;
- qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, val);
exit:
mutex_unlock(&bus->mdio_lock);
@@ -225,11 +509,14 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
u32 val;
int ret;
+ if (!qca8k_regmap_update_bits_eth(priv, reg, mask, write_val))
+ return 0;
+
qca8k_split_addr(reg, &r1, &r2, &page);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
- ret = qca8k_set_page(bus, page);
+ ret = qca8k_set_page(priv, page);
if (ret < 0)
goto exit;
@@ -239,7 +526,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
val &= ~mask;
val |= write_val;
- qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, val);
exit:
mutex_unlock(&bus->mdio_lock);
@@ -296,17 +583,13 @@ qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
static int
qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
{
- u32 reg[4], val;
- int i, ret;
+ u32 reg[3];
+ int ret;
/* load the ARL table into an array */
- for (i = 0; i < 4; i++) {
- ret = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4), &val);
- if (ret < 0)
- return ret;
-
- reg[i] = val;
- }
+ ret = qca8k_bulk_read(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg));
+ if (ret)
+ return ret;
/* vid - 83:72 */
fdb->vid = FIELD_GET(QCA8K_ATU_VID_MASK, reg[2]);
@@ -330,7 +613,6 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
u8 aging)
{
u32 reg[3] = { 0 };
- int i;
/* vid - 83:72 */
reg[2] = FIELD_PREP(QCA8K_ATU_VID_MASK, vid);
@@ -347,8 +629,7 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR5_MASK, mac[5]);
/* load the array into the ARL table */
- for (i = 0; i < 3; i++)
- qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]);
+ qca8k_bulk_write(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg));
}
static int
@@ -632,7 +913,10 @@ qca8k_mib_init(struct qca8k_priv *priv)
int ret;
mutex_lock(&priv->reg_mutex);
- ret = regmap_set_bits(priv->regmap, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+ ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB,
+ QCA8K_MIB_FUNC | QCA8K_MIB_BUSY,
+ FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_FLUSH) |
+ QCA8K_MIB_BUSY);
if (ret)
goto exit;
@@ -666,6 +950,199 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
regmap_clear_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask);
}
+static int
+qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data,
+ struct sk_buff *read_skb, u32 *val)
+{
+ struct sk_buff *skb = skb_copy(read_skb, GFP_KERNEL);
+ bool ack;
+ int ret;
+
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the copy pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(skb);
+
+ ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ QCA8K_ETHERNET_TIMEOUT);
+
+ ack = mgmt_eth_data->ack;
+
+ if (ret <= 0)
+ return -ETIMEDOUT;
+
+ if (!ack)
+ return -EINVAL;
+
+ *val = mgmt_eth_data->data[0];
+
+ return 0;
+}
+
+static int
+qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy,
+ int regnum, u16 data)
+{
+ struct sk_buff *write_skb, *clear_skb, *read_skb;
+ struct qca8k_mgmt_eth_data *mgmt_eth_data;
+ u32 write_val, clear_val = 0, val;
+ struct net_device *mgmt_master;
+ int ret, ret1;
+ bool ack;
+
+ if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
+ return -EINVAL;
+
+ mgmt_eth_data = &priv->mgmt_eth_data;
+
+ write_val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
+ QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
+ QCA8K_MDIO_MASTER_REG_ADDR(regnum);
+
+ if (read) {
+ write_val |= QCA8K_MDIO_MASTER_READ;
+ } else {
+ write_val |= QCA8K_MDIO_MASTER_WRITE;
+ write_val |= QCA8K_MDIO_MASTER_DATA(data);
+ }
+
+ /* Prealloc all the needed skb before the lock */
+ write_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &write_val,
+ QCA8K_ETHERNET_PHY_PRIORITY, sizeof(write_val));
+ if (!write_skb)
+ return -ENOMEM;
+
+ clear_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &clear_val,
+ QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val));
+ if (!clear_skb) {
+ ret = -ENOMEM;
+ goto err_clear_skb;
+ }
+
+ read_skb = qca8k_alloc_mdio_header(MDIO_READ, QCA8K_MDIO_MASTER_CTRL, &clear_val,
+ QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val));
+ if (!read_skb) {
+ ret = -ENOMEM;
+ goto err_read_skb;
+ }
+
+ /* Actually start the request:
+ * 1. Send mdio master packet
+ * 2. Busy Wait for mdio master command
+ * 3. Get the data if we are reading
+ * 4. Reset the mdio master (even with error)
+ */
+ mutex_lock(&mgmt_eth_data->mutex);
+
+ /* Check if mgmt_master is operational */
+ mgmt_master = priv->mgmt_master;
+ if (!mgmt_master) {
+ mutex_unlock(&mgmt_eth_data->mutex);
+ ret = -EINVAL;
+ goto err_mgmt_master;
+ }
+
+ read_skb->dev = mgmt_master;
+ clear_skb->dev = mgmt_master;
+ write_skb->dev = mgmt_master;
+
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the write pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(write_skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(write_skb);
+
+ ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ QCA8K_ETHERNET_TIMEOUT);
+
+ ack = mgmt_eth_data->ack;
+
+ if (ret <= 0) {
+ ret = -ETIMEDOUT;
+ kfree_skb(read_skb);
+ goto exit;
+ }
+
+ if (!ack) {
+ ret = -EINVAL;
+ kfree_skb(read_skb);
+ goto exit;
+ }
+
+ ret = read_poll_timeout(qca8k_phy_eth_busy_wait, ret1,
+ !(val & QCA8K_MDIO_MASTER_BUSY), 0,
+ QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
+ mgmt_eth_data, read_skb, &val);
+
+ if (ret < 0 && ret1 < 0) {
+ ret = ret1;
+ goto exit;
+ }
+
+ if (read) {
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the read pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(read_skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(read_skb);
+
+ ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ QCA8K_ETHERNET_TIMEOUT);
+
+ ack = mgmt_eth_data->ack;
+
+ if (ret <= 0) {
+ ret = -ETIMEDOUT;
+ goto exit;
+ }
+
+ if (!ack) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = mgmt_eth_data->data[0] & QCA8K_MDIO_MASTER_DATA_MASK;
+ } else {
+ kfree_skb(read_skb);
+ }
+exit:
+ reinit_completion(&mgmt_eth_data->rw_done);
+
+ /* Increment seq_num and set it in the clear pkt */
+ mgmt_eth_data->seq++;
+ qca8k_mdio_header_fill_seq_num(clear_skb, mgmt_eth_data->seq);
+ mgmt_eth_data->ack = false;
+
+ dev_queue_xmit(clear_skb);
+
+ wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+ QCA8K_ETHERNET_TIMEOUT);
+
+ mutex_unlock(&mgmt_eth_data->mutex);
+
+ return ret;
+
+ /* Error handling before lock */
+err_mgmt_master:
+ kfree_skb(read_skb);
+err_read_skb:
+ kfree_skb(clear_skb);
+err_clear_skb:
+ kfree_skb(write_skb);
+
+ return ret;
+}
+
static u32
qca8k_port_to_phy(int port)
{
@@ -704,8 +1181,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
}
static int
-qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data)
+qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data)
{
+ struct mii_bus *bus = priv->bus;
u16 r1, r2, page;
u32 val;
int ret;
@@ -722,18 +1200,18 @@ qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data)
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
- ret = qca8k_set_page(bus, page);
+ ret = qca8k_set_page(priv, page);
if (ret)
goto exit;
- qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, val);
ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
QCA8K_MDIO_MASTER_BUSY);
exit:
/* even if the busy_wait timeouts try to clear the MASTER_EN */
- qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
mutex_unlock(&bus->mdio_lock);
@@ -741,8 +1219,9 @@ exit:
}
static int
-qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
+qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum)
{
+ struct mii_bus *bus = priv->bus;
u16 r1, r2, page;
u32 val;
int ret;
@@ -758,11 +1237,11 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
- ret = qca8k_set_page(bus, page);
+ ret = qca8k_set_page(priv, page);
if (ret)
goto exit;
- qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, val);
ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
QCA8K_MDIO_MASTER_BUSY);
@@ -773,7 +1252,7 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
exit:
/* even if the busy_wait timeouts try to clear the MASTER_EN */
- qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
+ qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
mutex_unlock(&bus->mdio_lock);
@@ -787,24 +1266,35 @@ static int
qca8k_internal_mdio_write(struct mii_bus *slave_bus, int phy, int regnum, u16 data)
{
struct qca8k_priv *priv = slave_bus->priv;
- struct mii_bus *bus = priv->bus;
+ int ret;
- return qca8k_mdio_write(bus, phy, regnum, data);
+ /* Use mdio Ethernet when available, fallback to legacy one on error */
+ ret = qca8k_phy_eth_command(priv, false, phy, regnum, data);
+ if (!ret)
+ return 0;
+
+ return qca8k_mdio_write(priv, phy, regnum, data);
}
static int
qca8k_internal_mdio_read(struct mii_bus *slave_bus, int phy, int regnum)
{
struct qca8k_priv *priv = slave_bus->priv;
- struct mii_bus *bus = priv->bus;
+ int ret;
- return qca8k_mdio_read(bus, phy, regnum);
+ /* Use mdio Ethernet when available, fallback to legacy one on error */
+ ret = qca8k_phy_eth_command(priv, true, phy, regnum, 0);
+ if (ret >= 0)
+ return ret;
+
+ return qca8k_mdio_read(priv, phy, regnum);
}
static int
qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
{
struct qca8k_priv *priv = ds->priv;
+ int ret;
/* Check if the legacy mapping should be used and the
* port is not correctly mapped to the right PHY in the
@@ -813,7 +1303,12 @@ qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
if (priv->legacy_phy_port_mapping)
port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
- return qca8k_mdio_write(priv->bus, port, regnum, data);
+ /* Use mdio Ethernet when available, fallback to legacy one on error */
+ ret = qca8k_phy_eth_command(priv, false, port, regnum, 0);
+ if (!ret)
+ return ret;
+
+ return qca8k_mdio_write(priv, port, regnum, data);
}
static int
@@ -829,7 +1324,12 @@ qca8k_phy_read(struct dsa_switch *ds, int port, int regnum)
if (priv->legacy_phy_port_mapping)
port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
- ret = qca8k_mdio_read(priv->bus, port, regnum);
+ /* Use mdio Ethernet when available, fallback to legacy one on error */
+ ret = qca8k_phy_eth_command(priv, true, port, regnum, 0);
+ if (ret >= 0)
+ return ret;
+
+ ret = qca8k_mdio_read(priv, port, regnum);
if (ret < 0)
return 0xffff;
@@ -1531,67 +2031,39 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
}
}
-static void
-qca8k_phylink_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void qca8k_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
switch (port) {
case 0: /* 1st CPU port */
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- state->interface != PHY_INTERFACE_MODE_RGMII &&
- state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
- state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
- state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
- state->interface != PHY_INTERFACE_MODE_SGMII)
- goto unsupported;
+ phy_interface_set_rgmii(config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ config->supported_interfaces);
break;
+
case 1:
case 2:
case 3:
case 4:
case 5:
/* Internal PHY */
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- state->interface != PHY_INTERFACE_MODE_GMII &&
- state->interface != PHY_INTERFACE_MODE_INTERNAL)
- goto unsupported;
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
break;
+
case 6: /* 2nd CPU port / external PHY */
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- state->interface != PHY_INTERFACE_MODE_RGMII &&
- state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
- state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
- state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
- state->interface != PHY_INTERFACE_MODE_SGMII &&
- state->interface != PHY_INTERFACE_MODE_1000BASEX)
- goto unsupported;
+ phy_interface_set_rgmii(config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+ config->supported_interfaces);
break;
- default:
-unsupported:
- linkmode_zero(supported);
- return;
}
- phylink_set_port_modes(mask);
- phylink_set(mask, Autoneg);
-
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
-
- if (state->interface == PHY_INTERFACE_MODE_1000BASEX)
- phylink_set(mask, 1000baseX_Full);
-
- phylink_set(mask, Pause);
- phylink_set(mask, Asym_Pause);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
+ config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000FD;
}
static int
@@ -1703,6 +2175,97 @@ qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data)
ETH_GSTRING_LEN);
}
+static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *skb)
+{
+ const struct qca8k_match_data *match_data;
+ struct qca8k_mib_eth_data *mib_eth_data;
+ struct qca8k_priv *priv = ds->priv;
+ const struct qca8k_mib_desc *mib;
+ struct mib_ethhdr *mib_ethhdr;
+ int i, mib_len, offset = 0;
+ u64 *data;
+ u8 port;
+
+ mib_ethhdr = (struct mib_ethhdr *)skb_mac_header(skb);
+ mib_eth_data = &priv->mib_eth_data;
+
+ /* The switch autocast every port. Ignore other packet and
+ * parse only the requested one.
+ */
+ port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, ntohs(mib_ethhdr->hdr));
+ if (port != mib_eth_data->req_port)
+ goto exit;
+
+ match_data = device_get_match_data(priv->dev);
+ data = mib_eth_data->data;
+
+ for (i = 0; i < match_data->mib_count; i++) {
+ mib = &ar8327_mib[i];
+
+ /* First 3 mib are present in the skb head */
+ if (i < 3) {
+ data[i] = mib_ethhdr->data[i];
+ continue;
+ }
+
+ mib_len = sizeof(uint32_t);
+
+ /* Some mib are 64 bit wide */
+ if (mib->size == 2)
+ mib_len = sizeof(uint64_t);
+
+ /* Copy the mib value from packet to the */
+ memcpy(data + i, skb->data + offset, mib_len);
+
+ /* Set the offset for the next mib */
+ offset += mib_len;
+ }
+
+exit:
+ /* Complete on receiving all the mib packet */
+ if (refcount_dec_and_test(&mib_eth_data->port_parsed))
+ complete(&mib_eth_data->rw_done);
+}
+
+static int
+qca8k_get_ethtool_stats_eth(struct dsa_switch *ds, int port, u64 *data)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct qca8k_mib_eth_data *mib_eth_data;
+ struct qca8k_priv *priv = ds->priv;
+ int ret;
+
+ mib_eth_data = &priv->mib_eth_data;
+
+ mutex_lock(&mib_eth_data->mutex);
+
+ reinit_completion(&mib_eth_data->rw_done);
+
+ mib_eth_data->req_port = dp->index;
+ mib_eth_data->data = data;
+ refcount_set(&mib_eth_data->port_parsed, QCA8K_NUM_PORTS);
+
+ mutex_lock(&priv->reg_mutex);
+
+ /* Send mib autocast request */
+ ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB,
+ QCA8K_MIB_FUNC | QCA8K_MIB_BUSY,
+ FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_CAST) |
+ QCA8K_MIB_BUSY);
+
+ mutex_unlock(&priv->reg_mutex);
+
+ if (ret)
+ goto exit;
+
+ ret = wait_for_completion_timeout(&mib_eth_data->rw_done, QCA8K_ETHERNET_TIMEOUT);
+
+exit:
+ mutex_unlock(&mib_eth_data->mutex);
+
+ return ret;
+}
+
static void
qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
uint64_t *data)
@@ -1714,6 +2277,10 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
u32 hi = 0;
int ret;
+ if (priv->mgmt_master &&
+ qca8k_get_ethtool_stats_eth(ds, port, data) > 0)
+ return;
+
match_data = of_device_get_match_data(priv->dev);
for (i = 0; i < match_data->mib_count; i++) {
@@ -2383,6 +2950,46 @@ qca8k_port_lag_leave(struct dsa_switch *ds, int port,
return qca8k_lag_refresh_portmap(ds, port, lag, true);
}
+static void
+qca8k_master_change(struct dsa_switch *ds, const struct net_device *master,
+ bool operational)
+{
+ struct dsa_port *dp = master->dsa_ptr;
+ struct qca8k_priv *priv = ds->priv;
+
+ /* Ethernet MIB/MDIO is only supported for CPU port 0 */
+ if (dp->index != 0)
+ return;
+
+ mutex_lock(&priv->mgmt_eth_data.mutex);
+ mutex_lock(&priv->mib_eth_data.mutex);
+
+ priv->mgmt_master = operational ? (struct net_device *)master : NULL;
+
+ mutex_unlock(&priv->mib_eth_data.mutex);
+ mutex_unlock(&priv->mgmt_eth_data.mutex);
+}
+
+static int qca8k_connect_tag_protocol(struct dsa_switch *ds,
+ enum dsa_tag_protocol proto)
+{
+ struct qca_tagger_data *tagger_data;
+
+ switch (proto) {
+ case DSA_TAG_PROTO_QCA:
+ tagger_data = ds->tagger_data;
+
+ tagger_data->rw_reg_ack_handler = qca8k_rw_reg_ack_handler;
+ tagger_data->mib_autocast_handler = qca8k_mib_autocast_handler;
+
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static const struct dsa_switch_ops qca8k_switch_ops = {
.get_tag_protocol = qca8k_get_tag_protocol,
.setup = qca8k_setup,
@@ -2410,7 +3017,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
.port_vlan_filtering = qca8k_port_vlan_filtering,
.port_vlan_add = qca8k_port_vlan_add,
.port_vlan_del = qca8k_port_vlan_del,
- .phylink_validate = qca8k_phylink_validate,
+ .phylink_get_caps = qca8k_phylink_get_caps,
.phylink_mac_link_state = qca8k_phylink_mac_link_state,
.phylink_mac_config = qca8k_phylink_mac_config,
.phylink_mac_link_down = qca8k_phylink_mac_link_down,
@@ -2418,6 +3025,8 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
.get_phy_flags = qca8k_get_phy_flags,
.port_lag_join = qca8k_port_lag_join,
.port_lag_leave = qca8k_port_lag_leave,
+ .master_state_change = qca8k_master_change,
+ .connect_tag_protocol = qca8k_connect_tag_protocol,
};
static int qca8k_read_switch_id(struct qca8k_priv *priv)
@@ -2488,6 +3097,10 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
return PTR_ERR(priv->regmap);
}
+ priv->mdio_cache.page = 0xffff;
+ priv->mdio_cache.lo = 0xffff;
+ priv->mdio_cache.hi = 0xffff;
+
/* Check the detected switch id */
ret = qca8k_read_switch_id(priv);
if (ret)
@@ -2497,6 +3110,12 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
if (!priv->ds)
return -ENOMEM;
+ mutex_init(&priv->mgmt_eth_data.mutex);
+ init_completion(&priv->mgmt_eth_data.rw_done);
+
+ mutex_init(&priv->mib_eth_data.mutex);
+ init_completion(&priv->mib_eth_data.rw_done);
+
priv->ds->dev = &mdiodev->dev;
priv->ds->num_ports = QCA8K_NUM_PORTS;
priv->ds->priv = priv;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index ab4a417b25a9..c3d3c2269b1d 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -11,6 +11,11 @@
#include <linux/delay.h>
#include <linux/regmap.h>
#include <linux/gpio.h>
+#include <linux/dsa/tag_qca.h>
+
+#define QCA8K_ETHERNET_MDIO_PRIORITY 7
+#define QCA8K_ETHERNET_PHY_PRIORITY 6
+#define QCA8K_ETHERNET_TIMEOUT 100
#define QCA8K_NUM_PORTS 7
#define QCA8K_NUM_CPU_PORTS 2
@@ -63,7 +68,7 @@
#define QCA8K_REG_MODULE_EN 0x030
#define QCA8K_MODULE_EN_MIB BIT(0)
#define QCA8K_REG_MIB 0x034
-#define QCA8K_MIB_FLUSH BIT(24)
+#define QCA8K_MIB_FUNC GENMASK(26, 24)
#define QCA8K_MIB_CPU_KEEP BIT(20)
#define QCA8K_MIB_BUSY BIT(17)
#define QCA8K_MDIO_MASTER_CTRL 0x3c
@@ -313,6 +318,12 @@ enum qca8k_vlan_cmd {
QCA8K_VLAN_READ = 6,
};
+enum qca8k_mid_cmd {
+ QCA8K_MIB_FLUSH = 1,
+ QCA8K_MIB_FLUSH_PORT = 2,
+ QCA8K_MIB_CAST = 3,
+};
+
struct ar8xxx_port_status {
int enabled;
};
@@ -328,6 +339,22 @@ enum {
QCA8K_CPU_PORT6,
};
+struct qca8k_mgmt_eth_data {
+ struct completion rw_done;
+ struct mutex mutex; /* Enforce one mdio read/write at time */
+ bool ack;
+ u32 seq;
+ u32 data[4];
+};
+
+struct qca8k_mib_eth_data {
+ struct completion rw_done;
+ struct mutex mutex; /* Process one command at time */
+ refcount_t port_parsed; /* Counter to track parsed port */
+ u8 req_port;
+ u64 *data; /* pointer to ethtool data */
+};
+
struct qca8k_ports_config {
bool sgmii_rx_clk_falling_edge;
bool sgmii_tx_clk_falling_edge;
@@ -336,6 +363,19 @@ struct qca8k_ports_config {
u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */
};
+struct qca8k_mdio_cache {
+/* The 32bit switch registers are accessed indirectly. To achieve this we need
+ * to set the page of the register. Track the last page that was set to reduce
+ * mdio writes
+ */
+ u16 page;
+/* lo and hi can also be cached and from Documentation we can skip one
+ * extra mdio write if lo or hi is didn't change.
+ */
+ u16 lo;
+ u16 hi;
+};
+
struct qca8k_priv {
u8 switch_id;
u8 switch_revision;
@@ -353,6 +393,10 @@ struct qca8k_priv {
struct dsa_switch_ops ops;
struct gpio_desc *reset_gpio;
unsigned int port_mtu[QCA8K_NUM_PORTS];
+ struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */
+ struct qca8k_mgmt_eth_data mgmt_eth_data;
+ struct qca8k_mib_eth_data mib_eth_data;
+ struct qca8k_mdio_cache mdio_cache;
};
struct qca8k_mib_desc {
diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c
deleted file mode 100644
index aae46ada8d83..000000000000
--- a/drivers/net/dsa/realtek-smi-core.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/* Realtek Simple Management Interface (SMI) driver
- * It can be discussed how "simple" this interface is.
- *
- * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels
- * but the protocol is not MDIO at all. Instead it is a Realtek
- * pecularity that need to bit-bang the lines in a special way to
- * communicate with the switch.
- *
- * ASICs we intend to support with this driver:
- *
- * RTL8366 - The original version, apparently
- * RTL8369 - Similar enough to have the same datsheet as RTL8366
- * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
- * different register layout from the other two
- * RTL8366S - Is this "RTL8366 super"?
- * RTL8367 - Has an OpenWRT driver as well
- * RTL8368S - Seems to be an alternative name for RTL8366RB
- * RTL8370 - Also uses SMI
- *
- * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
- * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
- * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
- * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
- * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_mdio.h>
-#include <linux/delay.h>
-#include <linux/gpio/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/bitops.h>
-#include <linux/if_bridge.h>
-
-#include "realtek-smi-core.h"
-
-#define REALTEK_SMI_ACK_RETRY_COUNT 5
-#define REALTEK_SMI_HW_STOP_DELAY 25 /* msecs */
-#define REALTEK_SMI_HW_START_DELAY 100 /* msecs */
-
-static inline void realtek_smi_clk_delay(struct realtek_smi *smi)
-{
- ndelay(smi->clk_delay);
-}
-
-static void realtek_smi_start(struct realtek_smi *smi)
-{
- /* Set GPIO pins to output mode, with initial state:
- * SCK = 0, SDA = 1
- */
- gpiod_direction_output(smi->mdc, 0);
- gpiod_direction_output(smi->mdio, 1);
- realtek_smi_clk_delay(smi);
-
- /* CLK 1: 0 -> 1, 1 -> 0 */
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 0);
- realtek_smi_clk_delay(smi);
-
- /* CLK 2: */
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdio, 0);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 0);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdio, 1);
-}
-
-static void realtek_smi_stop(struct realtek_smi *smi)
-{
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdio, 0);
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdio, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 0);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 1);
-
- /* Add a click */
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 0);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 1);
-
- /* Set GPIO pins to input mode */
- gpiod_direction_input(smi->mdio);
- gpiod_direction_input(smi->mdc);
-}
-
-static void realtek_smi_write_bits(struct realtek_smi *smi, u32 data, u32 len)
-{
- for (; len > 0; len--) {
- realtek_smi_clk_delay(smi);
-
- /* Prepare data */
- gpiod_set_value(smi->mdio, !!(data & (1 << (len - 1))));
- realtek_smi_clk_delay(smi);
-
- /* Clocking */
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- gpiod_set_value(smi->mdc, 0);
- }
-}
-
-static void realtek_smi_read_bits(struct realtek_smi *smi, u32 len, u32 *data)
-{
- gpiod_direction_input(smi->mdio);
-
- for (*data = 0; len > 0; len--) {
- u32 u;
-
- realtek_smi_clk_delay(smi);
-
- /* Clocking */
- gpiod_set_value(smi->mdc, 1);
- realtek_smi_clk_delay(smi);
- u = !!gpiod_get_value(smi->mdio);
- gpiod_set_value(smi->mdc, 0);
-
- *data |= (u << (len - 1));
- }
-
- gpiod_direction_output(smi->mdio, 0);
-}
-
-static int realtek_smi_wait_for_ack(struct realtek_smi *smi)
-{
- int retry_cnt;
-
- retry_cnt = 0;
- do {
- u32 ack;
-
- realtek_smi_read_bits(smi, 1, &ack);
- if (ack == 0)
- break;
-
- if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) {
- dev_err(smi->dev, "ACK timeout\n");
- return -ETIMEDOUT;
- }
- } while (1);
-
- return 0;
-}
-
-static int realtek_smi_write_byte(struct realtek_smi *smi, u8 data)
-{
- realtek_smi_write_bits(smi, data, 8);
- return realtek_smi_wait_for_ack(smi);
-}
-
-static int realtek_smi_write_byte_noack(struct realtek_smi *smi, u8 data)
-{
- realtek_smi_write_bits(smi, data, 8);
- return 0;
-}
-
-static int realtek_smi_read_byte0(struct realtek_smi *smi, u8 *data)
-{
- u32 t;
-
- /* Read data */
- realtek_smi_read_bits(smi, 8, &t);
- *data = (t & 0xff);
-
- /* Send an ACK */
- realtek_smi_write_bits(smi, 0x00, 1);
-
- return 0;
-}
-
-static int realtek_smi_read_byte1(struct realtek_smi *smi, u8 *data)
-{
- u32 t;
-
- /* Read data */
- realtek_smi_read_bits(smi, 8, &t);
- *data = (t & 0xff);
-
- /* Send an ACK */
- realtek_smi_write_bits(smi, 0x01, 1);
-
- return 0;
-}
-
-static int realtek_smi_read_reg(struct realtek_smi *smi, u32 addr, u32 *data)
-{
- unsigned long flags;
- u8 lo = 0;
- u8 hi = 0;
- int ret;
-
- spin_lock_irqsave(&smi->lock, flags);
-
- realtek_smi_start(smi);
-
- /* Send READ command */
- ret = realtek_smi_write_byte(smi, smi->cmd_read);
- if (ret)
- goto out;
-
- /* Set ADDR[7:0] */
- ret = realtek_smi_write_byte(smi, addr & 0xff);
- if (ret)
- goto out;
-
- /* Set ADDR[15:8] */
- ret = realtek_smi_write_byte(smi, addr >> 8);
- if (ret)
- goto out;
-
- /* Read DATA[7:0] */
- realtek_smi_read_byte0(smi, &lo);
- /* Read DATA[15:8] */
- realtek_smi_read_byte1(smi, &hi);
-
- *data = ((u32)lo) | (((u32)hi) << 8);
-
- ret = 0;
-
- out:
- realtek_smi_stop(smi);
- spin_unlock_irqrestore(&smi->lock, flags);
-
- return ret;
-}
-
-static int realtek_smi_write_reg(struct realtek_smi *smi,
- u32 addr, u32 data, bool ack)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&smi->lock, flags);
-
- realtek_smi_start(smi);
-
- /* Send WRITE command */
- ret = realtek_smi_write_byte(smi, smi->cmd_write);
- if (ret)
- goto out;
-
- /* Set ADDR[7:0] */
- ret = realtek_smi_write_byte(smi, addr & 0xff);
- if (ret)
- goto out;
-
- /* Set ADDR[15:8] */
- ret = realtek_smi_write_byte(smi, addr >> 8);
- if (ret)
- goto out;
-
- /* Write DATA[7:0] */
- ret = realtek_smi_write_byte(smi, data & 0xff);
- if (ret)
- goto out;
-
- /* Write DATA[15:8] */
- if (ack)
- ret = realtek_smi_write_byte(smi, data >> 8);
- else
- ret = realtek_smi_write_byte_noack(smi, data >> 8);
- if (ret)
- goto out;
-
- ret = 0;
-
- out:
- realtek_smi_stop(smi);
- spin_unlock_irqrestore(&smi->lock, flags);
-
- return ret;
-}
-
-/* There is one single case when we need to use this accessor and that
- * is when issueing soft reset. Since the device reset as soon as we write
- * that bit, no ACK will come back for natural reasons.
- */
-int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
- u32 data)
-{
- return realtek_smi_write_reg(smi, addr, data, false);
-}
-EXPORT_SYMBOL_GPL(realtek_smi_write_reg_noack);
-
-/* Regmap accessors */
-
-static int realtek_smi_write(void *ctx, u32 reg, u32 val)
-{
- struct realtek_smi *smi = ctx;
-
- return realtek_smi_write_reg(smi, reg, val, true);
-}
-
-static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
-{
- struct realtek_smi *smi = ctx;
-
- return realtek_smi_read_reg(smi, reg, val);
-}
-
-static const struct regmap_config realtek_smi_mdio_regmap_config = {
- .reg_bits = 10, /* A4..A0 R4..R0 */
- .val_bits = 16,
- .reg_stride = 1,
- /* PHY regs are at 0x8000 */
- .max_register = 0xffff,
- .reg_format_endian = REGMAP_ENDIAN_BIG,
- .reg_read = realtek_smi_read,
- .reg_write = realtek_smi_write,
- .cache_type = REGCACHE_NONE,
-};
-
-static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
-{
- struct realtek_smi *smi = bus->priv;
-
- return smi->ops->phy_read(smi, addr, regnum);
-}
-
-static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
- u16 val)
-{
- struct realtek_smi *smi = bus->priv;
-
- return smi->ops->phy_write(smi, addr, regnum, val);
-}
-
-int realtek_smi_setup_mdio(struct realtek_smi *smi)
-{
- struct device_node *mdio_np;
- int ret;
-
- mdio_np = of_get_compatible_child(smi->dev->of_node, "realtek,smi-mdio");
- if (!mdio_np) {
- dev_err(smi->dev, "no MDIO bus node\n");
- return -ENODEV;
- }
-
- smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev);
- if (!smi->slave_mii_bus) {
- ret = -ENOMEM;
- goto err_put_node;
- }
- smi->slave_mii_bus->priv = smi;
- smi->slave_mii_bus->name = "SMI slave MII";
- smi->slave_mii_bus->read = realtek_smi_mdio_read;
- smi->slave_mii_bus->write = realtek_smi_mdio_write;
- snprintf(smi->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
- smi->ds->index);
- smi->slave_mii_bus->dev.of_node = mdio_np;
- smi->slave_mii_bus->parent = smi->dev;
- smi->ds->slave_mii_bus = smi->slave_mii_bus;
-
- ret = devm_of_mdiobus_register(smi->dev, smi->slave_mii_bus, mdio_np);
- if (ret) {
- dev_err(smi->dev, "unable to register MDIO bus %s\n",
- smi->slave_mii_bus->id);
- goto err_put_node;
- }
-
- return 0;
-
-err_put_node:
- of_node_put(mdio_np);
-
- return ret;
-}
-
-static int realtek_smi_probe(struct platform_device *pdev)
-{
- const struct realtek_smi_variant *var;
- struct device *dev = &pdev->dev;
- struct realtek_smi *smi;
- struct device_node *np;
- int ret;
-
- var = of_device_get_match_data(dev);
- np = dev->of_node;
-
- smi = devm_kzalloc(dev, sizeof(*smi) + var->chip_data_sz, GFP_KERNEL);
- if (!smi)
- return -ENOMEM;
- smi->chip_data = (void *)smi + sizeof(*smi);
- smi->map = devm_regmap_init(dev, NULL, smi,
- &realtek_smi_mdio_regmap_config);
- if (IS_ERR(smi->map)) {
- ret = PTR_ERR(smi->map);
- dev_err(dev, "regmap init failed: %d\n", ret);
- return ret;
- }
-
- /* Link forward and backward */
- smi->dev = dev;
- smi->clk_delay = var->clk_delay;
- smi->cmd_read = var->cmd_read;
- smi->cmd_write = var->cmd_write;
- smi->ops = var->ops;
-
- dev_set_drvdata(dev, smi);
- spin_lock_init(&smi->lock);
-
- /* TODO: if power is software controlled, set up any regulators here */
-
- /* Assert then deassert RESET */
- smi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
- if (IS_ERR(smi->reset)) {
- dev_err(dev, "failed to get RESET GPIO\n");
- return PTR_ERR(smi->reset);
- }
- msleep(REALTEK_SMI_HW_STOP_DELAY);
- gpiod_set_value(smi->reset, 0);
- msleep(REALTEK_SMI_HW_START_DELAY);
- dev_info(dev, "deasserted RESET\n");
-
- /* Fetch MDIO pins */
- smi->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
- if (IS_ERR(smi->mdc))
- return PTR_ERR(smi->mdc);
- smi->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
- if (IS_ERR(smi->mdio))
- return PTR_ERR(smi->mdio);
-
- smi->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
-
- ret = smi->ops->detect(smi);
- if (ret) {
- dev_err(dev, "unable to detect switch\n");
- return ret;
- }
-
- smi->ds = devm_kzalloc(dev, sizeof(*smi->ds), GFP_KERNEL);
- if (!smi->ds)
- return -ENOMEM;
-
- smi->ds->dev = dev;
- smi->ds->num_ports = smi->num_ports;
- smi->ds->priv = smi;
-
- smi->ds->ops = var->ds_ops;
- ret = dsa_register_switch(smi->ds);
- if (ret) {
- dev_err_probe(dev, ret, "unable to register switch\n");
- return ret;
- }
- return 0;
-}
-
-static int realtek_smi_remove(struct platform_device *pdev)
-{
- struct realtek_smi *smi = platform_get_drvdata(pdev);
-
- if (!smi)
- return 0;
-
- dsa_unregister_switch(smi->ds);
- if (smi->slave_mii_bus)
- of_node_put(smi->slave_mii_bus->dev.of_node);
- gpiod_set_value(smi->reset, 1);
-
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
-static void realtek_smi_shutdown(struct platform_device *pdev)
-{
- struct realtek_smi *smi = platform_get_drvdata(pdev);
-
- if (!smi)
- return;
-
- dsa_switch_shutdown(smi->ds);
-
- platform_set_drvdata(pdev, NULL);
-}
-
-static const struct of_device_id realtek_smi_of_match[] = {
- {
- .compatible = "realtek,rtl8366rb",
- .data = &rtl8366rb_variant,
- },
- {
- /* FIXME: add support for RTL8366S and more */
- .compatible = "realtek,rtl8366s",
- .data = NULL,
- },
- {
- .compatible = "realtek,rtl8365mb",
- .data = &rtl8365mb_variant,
- },
- { /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
-
-static struct platform_driver realtek_smi_driver = {
- .driver = {
- .name = "realtek-smi",
- .of_match_table = of_match_ptr(realtek_smi_of_match),
- },
- .probe = realtek_smi_probe,
- .remove = realtek_smi_remove,
- .shutdown = realtek_smi_shutdown,
-};
-module_platform_driver(realtek_smi_driver);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig
new file mode 100644
index 000000000000..b7427a8292b2
--- /dev/null
+++ b/drivers/net/dsa/realtek/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig NET_DSA_REALTEK
+ tristate "Realtek Ethernet switch family support"
+ depends on NET_DSA
+ select FIXED_PHY
+ select IRQ_DOMAIN
+ select REALTEK_PHY
+ select REGMAP
+ help
+ Select to enable support for Realtek Ethernet switch chips.
+
+config NET_DSA_REALTEK_MDIO
+ tristate "Realtek MDIO connected switch driver"
+ depends on NET_DSA_REALTEK
+ help
+ Select to enable support for registering switches configured
+ through MDIO.
+
+config NET_DSA_REALTEK_SMI
+ tristate "Realtek SMI connected switch driver"
+ depends on NET_DSA_REALTEK
+ help
+ Select to enable support for registering switches connected
+ through SMI.
+
+config NET_DSA_REALTEK_RTL8365MB
+ tristate "Realtek RTL8365MB switch subdriver"
+ depends on NET_DSA_REALTEK
+ depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+ select NET_DSA_TAG_RTL8_4
+ help
+ Select to enable support for Realtek RTL8365MB-VC and RTL8367S.
+
+config NET_DSA_REALTEK_RTL8366RB
+ tristate "Realtek RTL8366RB switch subdriver"
+ depends on NET_DSA_REALTEK
+ depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+ select NET_DSA_TAG_RTL4_A
+ help
+ Select to enable support for Realtek RTL8366RB
diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile
new file mode 100644
index 000000000000..0aab57252a7c
--- /dev/null
+++ b/drivers/net/dsa/realtek/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_NET_DSA_REALTEK_MDIO) += realtek-mdio.o
+obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o
+obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o
+rtl8366-objs := rtl8366-core.o rtl8366rb.o
+obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o
diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
new file mode 100644
index 000000000000..0c5f2bdced9d
--- /dev/null
+++ b/drivers/net/dsa/realtek/realtek-mdio.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Realtek MDIO interface driver
+ *
+ * ASICs we intend to support with this driver:
+ *
+ * RTL8366 - The original version, apparently
+ * RTL8369 - Similar enough to have the same datsheet as RTL8366
+ * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
+ * different register layout from the other two
+ * RTL8366S - Is this "RTL8366 super"?
+ * RTL8367 - Has an OpenWRT driver as well
+ * RTL8368S - Seems to be an alternative name for RTL8366RB
+ * RTL8370 - Also uses SMI
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include "realtek.h"
+
+/* Read/write via mdiobus */
+#define REALTEK_MDIO_CTRL0_REG 31
+#define REALTEK_MDIO_START_REG 29
+#define REALTEK_MDIO_CTRL1_REG 21
+#define REALTEK_MDIO_ADDRESS_REG 23
+#define REALTEK_MDIO_DATA_WRITE_REG 24
+#define REALTEK_MDIO_DATA_READ_REG 25
+
+#define REALTEK_MDIO_START_OP 0xFFFF
+#define REALTEK_MDIO_ADDR_OP 0x000E
+#define REALTEK_MDIO_READ_OP 0x0001
+#define REALTEK_MDIO_WRITE_OP 0x0003
+
+static int realtek_mdio_write(void *ctx, u32 reg, u32 val)
+{
+ struct realtek_priv *priv = ctx;
+ struct mii_bus *bus = priv->bus;
+ int ret;
+
+ mutex_lock(&bus->mdio_lock);
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_DATA_WRITE_REG, val);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_WRITE_OP);
+
+out_unlock:
+ mutex_unlock(&bus->mdio_lock);
+
+ return ret;
+}
+
+static int realtek_mdio_read(void *ctx, u32 reg, u32 *val)
+{
+ struct realtek_priv *priv = ctx;
+ struct mii_bus *bus = priv->bus;
+ int ret;
+
+ mutex_lock(&bus->mdio_lock);
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_READ_OP);
+ if (ret)
+ goto out_unlock;
+
+ ret = bus->read(bus, priv->mdio_addr, REALTEK_MDIO_DATA_READ_REG);
+ if (ret >= 0) {
+ *val = ret;
+ ret = 0;
+ }
+
+out_unlock:
+ mutex_unlock(&bus->mdio_lock);
+
+ return ret;
+}
+
+static const struct regmap_config realtek_mdio_regmap_config = {
+ .reg_bits = 10, /* A4..A0 R4..R0 */
+ .val_bits = 16,
+ .reg_stride = 1,
+ /* PHY regs are at 0x8000 */
+ .max_register = 0xffff,
+ .reg_format_endian = REGMAP_ENDIAN_BIG,
+ .reg_read = realtek_mdio_read,
+ .reg_write = realtek_mdio_write,
+ .cache_type = REGCACHE_NONE,
+};
+
+static int realtek_mdio_probe(struct mdio_device *mdiodev)
+{
+ struct realtek_priv *priv;
+ struct device *dev = &mdiodev->dev;
+ const struct realtek_variant *var;
+ int ret;
+ struct device_node *np;
+
+ var = of_device_get_match_data(dev);
+ if (!var)
+ return -EINVAL;
+
+ priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->map = devm_regmap_init(dev, NULL, priv, &realtek_mdio_regmap_config);
+ if (IS_ERR(priv->map)) {
+ ret = PTR_ERR(priv->map);
+ dev_err(dev, "regmap init failed: %d\n", ret);
+ return ret;
+ }
+
+ priv->mdio_addr = mdiodev->addr;
+ priv->bus = mdiodev->bus;
+ priv->dev = &mdiodev->dev;
+ priv->chip_data = (void *)priv + sizeof(*priv);
+
+ priv->clk_delay = var->clk_delay;
+ priv->cmd_read = var->cmd_read;
+ priv->cmd_write = var->cmd_write;
+ priv->ops = var->ops;
+
+ priv->write_reg_noack = realtek_mdio_write;
+
+ np = dev->of_node;
+
+ dev_set_drvdata(dev, priv);
+
+ /* TODO: if power is software controlled, set up any regulators here */
+ priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
+
+ ret = priv->ops->detect(priv);
+ if (ret) {
+ dev_err(dev, "unable to detect switch\n");
+ return ret;
+ }
+
+ priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
+ if (!priv->ds)
+ return -ENOMEM;
+
+ priv->ds->dev = dev;
+ priv->ds->num_ports = priv->num_ports;
+ priv->ds->priv = priv;
+ priv->ds->ops = var->ds_ops_mdio;
+
+ ret = dsa_register_switch(priv->ds);
+ if (ret) {
+ dev_err(priv->dev, "unable to register switch ret = %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void realtek_mdio_remove(struct mdio_device *mdiodev)
+{
+ struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+ if (!priv)
+ return;
+
+ dsa_unregister_switch(priv->ds);
+
+ dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void realtek_mdio_shutdown(struct mdio_device *mdiodev)
+{
+ struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+ if (!priv)
+ return;
+
+ dsa_switch_shutdown(priv->ds);
+
+ dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static const struct of_device_id realtek_mdio_of_match[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
+ { .compatible = "realtek,rtl8366rb", .data = &rtl8366rb_variant, },
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
+ { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
+ { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, },
+#endif
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, realtek_mdio_of_match);
+
+static struct mdio_driver realtek_mdio_driver = {
+ .mdiodrv.driver = {
+ .name = "realtek-mdio",
+ .of_match_table = of_match_ptr(realtek_mdio_of_match),
+ },
+ .probe = realtek_mdio_probe,
+ .remove = realtek_mdio_remove,
+ .shutdown = realtek_mdio_shutdown,
+};
+
+mdio_module_driver(realtek_mdio_driver);
+
+MODULE_AUTHOR("Luiz Angelo Daros de Luca <luizluca@gmail.com>");
+MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via MDIO interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
new file mode 100644
index 000000000000..946fbbd70153
--- /dev/null
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Realtek Simple Management Interface (SMI) driver
+ * It can be discussed how "simple" this interface is.
+ *
+ * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels
+ * but the protocol is not MDIO at all. Instead it is a Realtek
+ * pecularity that need to bit-bang the lines in a special way to
+ * communicate with the switch.
+ *
+ * ASICs we intend to support with this driver:
+ *
+ * RTL8366 - The original version, apparently
+ * RTL8369 - Similar enough to have the same datsheet as RTL8366
+ * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
+ * different register layout from the other two
+ * RTL8366S - Is this "RTL8366 super"?
+ * RTL8367 - Has an OpenWRT driver as well
+ * RTL8368S - Seems to be an alternative name for RTL8366RB
+ * RTL8370 - Also uses SMI
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/bitops.h>
+#include <linux/if_bridge.h>
+
+#include "realtek.h"
+
+#define REALTEK_SMI_ACK_RETRY_COUNT 5
+#define REALTEK_SMI_HW_STOP_DELAY 25 /* msecs */
+#define REALTEK_SMI_HW_START_DELAY 100 /* msecs */
+
+static inline void realtek_smi_clk_delay(struct realtek_priv *priv)
+{
+ ndelay(priv->clk_delay);
+}
+
+static void realtek_smi_start(struct realtek_priv *priv)
+{
+ /* Set GPIO pins to output mode, with initial state:
+ * SCK = 0, SDA = 1
+ */
+ gpiod_direction_output(priv->mdc, 0);
+ gpiod_direction_output(priv->mdio, 1);
+ realtek_smi_clk_delay(priv);
+
+ /* CLK 1: 0 -> 1, 1 -> 0 */
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 0);
+ realtek_smi_clk_delay(priv);
+
+ /* CLK 2: */
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdio, 0);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 0);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdio, 1);
+}
+
+static void realtek_smi_stop(struct realtek_priv *priv)
+{
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdio, 0);
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdio, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 0);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 1);
+
+ /* Add a click */
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 0);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 1);
+
+ /* Set GPIO pins to input mode */
+ gpiod_direction_input(priv->mdio);
+ gpiod_direction_input(priv->mdc);
+}
+
+static void realtek_smi_write_bits(struct realtek_priv *priv, u32 data, u32 len)
+{
+ for (; len > 0; len--) {
+ realtek_smi_clk_delay(priv);
+
+ /* Prepare data */
+ gpiod_set_value(priv->mdio, !!(data & (1 << (len - 1))));
+ realtek_smi_clk_delay(priv);
+
+ /* Clocking */
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ gpiod_set_value(priv->mdc, 0);
+ }
+}
+
+static void realtek_smi_read_bits(struct realtek_priv *priv, u32 len, u32 *data)
+{
+ gpiod_direction_input(priv->mdio);
+
+ for (*data = 0; len > 0; len--) {
+ u32 u;
+
+ realtek_smi_clk_delay(priv);
+
+ /* Clocking */
+ gpiod_set_value(priv->mdc, 1);
+ realtek_smi_clk_delay(priv);
+ u = !!gpiod_get_value(priv->mdio);
+ gpiod_set_value(priv->mdc, 0);
+
+ *data |= (u << (len - 1));
+ }
+
+ gpiod_direction_output(priv->mdio, 0);
+}
+
+static int realtek_smi_wait_for_ack(struct realtek_priv *priv)
+{
+ int retry_cnt;
+
+ retry_cnt = 0;
+ do {
+ u32 ack;
+
+ realtek_smi_read_bits(priv, 1, &ack);
+ if (ack == 0)
+ break;
+
+ if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) {
+ dev_err(priv->dev, "ACK timeout\n");
+ return -ETIMEDOUT;
+ }
+ } while (1);
+
+ return 0;
+}
+
+static int realtek_smi_write_byte(struct realtek_priv *priv, u8 data)
+{
+ realtek_smi_write_bits(priv, data, 8);
+ return realtek_smi_wait_for_ack(priv);
+}
+
+static int realtek_smi_write_byte_noack(struct realtek_priv *priv, u8 data)
+{
+ realtek_smi_write_bits(priv, data, 8);
+ return 0;
+}
+
+static int realtek_smi_read_byte0(struct realtek_priv *priv, u8 *data)
+{
+ u32 t;
+
+ /* Read data */
+ realtek_smi_read_bits(priv, 8, &t);
+ *data = (t & 0xff);
+
+ /* Send an ACK */
+ realtek_smi_write_bits(priv, 0x00, 1);
+
+ return 0;
+}
+
+static int realtek_smi_read_byte1(struct realtek_priv *priv, u8 *data)
+{
+ u32 t;
+
+ /* Read data */
+ realtek_smi_read_bits(priv, 8, &t);
+ *data = (t & 0xff);
+
+ /* Send an ACK */
+ realtek_smi_write_bits(priv, 0x01, 1);
+
+ return 0;
+}
+
+static int realtek_smi_read_reg(struct realtek_priv *priv, u32 addr, u32 *data)
+{
+ unsigned long flags;
+ u8 lo = 0;
+ u8 hi = 0;
+ int ret;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ realtek_smi_start(priv);
+
+ /* Send READ command */
+ ret = realtek_smi_write_byte(priv, priv->cmd_read);
+ if (ret)
+ goto out;
+
+ /* Set ADDR[7:0] */
+ ret = realtek_smi_write_byte(priv, addr & 0xff);
+ if (ret)
+ goto out;
+
+ /* Set ADDR[15:8] */
+ ret = realtek_smi_write_byte(priv, addr >> 8);
+ if (ret)
+ goto out;
+
+ /* Read DATA[7:0] */
+ realtek_smi_read_byte0(priv, &lo);
+ /* Read DATA[15:8] */
+ realtek_smi_read_byte1(priv, &hi);
+
+ *data = ((u32)lo) | (((u32)hi) << 8);
+
+ ret = 0;
+
+ out:
+ realtek_smi_stop(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
+}
+
+static int realtek_smi_write_reg(struct realtek_priv *priv,
+ u32 addr, u32 data, bool ack)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ realtek_smi_start(priv);
+
+ /* Send WRITE command */
+ ret = realtek_smi_write_byte(priv, priv->cmd_write);
+ if (ret)
+ goto out;
+
+ /* Set ADDR[7:0] */
+ ret = realtek_smi_write_byte(priv, addr & 0xff);
+ if (ret)
+ goto out;
+
+ /* Set ADDR[15:8] */
+ ret = realtek_smi_write_byte(priv, addr >> 8);
+ if (ret)
+ goto out;
+
+ /* Write DATA[7:0] */
+ ret = realtek_smi_write_byte(priv, data & 0xff);
+ if (ret)
+ goto out;
+
+ /* Write DATA[15:8] */
+ if (ack)
+ ret = realtek_smi_write_byte(priv, data >> 8);
+ else
+ ret = realtek_smi_write_byte_noack(priv, data >> 8);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+ out:
+ realtek_smi_stop(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
+}
+
+/* There is one single case when we need to use this accessor and that
+ * is when issueing soft reset. Since the device reset as soon as we write
+ * that bit, no ACK will come back for natural reasons.
+ */
+static int realtek_smi_write_reg_noack(void *ctx, u32 reg, u32 val)
+{
+ return realtek_smi_write_reg(ctx, reg, val, false);
+}
+
+/* Regmap accessors */
+
+static int realtek_smi_write(void *ctx, u32 reg, u32 val)
+{
+ struct realtek_priv *priv = ctx;
+
+ return realtek_smi_write_reg(priv, reg, val, true);
+}
+
+static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
+{
+ struct realtek_priv *priv = ctx;
+
+ return realtek_smi_read_reg(priv, reg, val);
+}
+
+static const struct regmap_config realtek_smi_mdio_regmap_config = {
+ .reg_bits = 10, /* A4..A0 R4..R0 */
+ .val_bits = 16,
+ .reg_stride = 1,
+ /* PHY regs are at 0x8000 */
+ .max_register = 0xffff,
+ .reg_format_endian = REGMAP_ENDIAN_BIG,
+ .reg_read = realtek_smi_read,
+ .reg_write = realtek_smi_write,
+ .cache_type = REGCACHE_NONE,
+};
+
+static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+ struct realtek_priv *priv = bus->priv;
+
+ return priv->ops->phy_read(priv, addr, regnum);
+}
+
+static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
+ u16 val)
+{
+ struct realtek_priv *priv = bus->priv;
+
+ return priv->ops->phy_write(priv, addr, regnum, val);
+}
+
+static int realtek_smi_setup_mdio(struct dsa_switch *ds)
+{
+ struct realtek_priv *priv = ds->priv;
+ struct device_node *mdio_np;
+ int ret;
+
+ mdio_np = of_get_compatible_child(priv->dev->of_node, "realtek,smi-mdio");
+ if (!mdio_np) {
+ dev_err(priv->dev, "no MDIO bus node\n");
+ return -ENODEV;
+ }
+
+ priv->slave_mii_bus = devm_mdiobus_alloc(priv->dev);
+ if (!priv->slave_mii_bus) {
+ ret = -ENOMEM;
+ goto err_put_node;
+ }
+ priv->slave_mii_bus->priv = priv;
+ priv->slave_mii_bus->name = "SMI slave MII";
+ priv->slave_mii_bus->read = realtek_smi_mdio_read;
+ priv->slave_mii_bus->write = realtek_smi_mdio_write;
+ snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
+ ds->index);
+ priv->slave_mii_bus->dev.of_node = mdio_np;
+ priv->slave_mii_bus->parent = priv->dev;
+ ds->slave_mii_bus = priv->slave_mii_bus;
+
+ ret = devm_of_mdiobus_register(priv->dev, priv->slave_mii_bus, mdio_np);
+ if (ret) {
+ dev_err(priv->dev, "unable to register MDIO bus %s\n",
+ priv->slave_mii_bus->id);
+ goto err_put_node;
+ }
+
+ return 0;
+
+err_put_node:
+ of_node_put(mdio_np);
+
+ return ret;
+}
+
+static int realtek_smi_probe(struct platform_device *pdev)
+{
+ const struct realtek_variant *var;
+ struct device *dev = &pdev->dev;
+ struct realtek_priv *priv;
+ struct device_node *np;
+ int ret;
+
+ var = of_device_get_match_data(dev);
+ np = dev->of_node;
+
+ priv = devm_kzalloc(dev, sizeof(*priv) + var->chip_data_sz, GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ priv->chip_data = (void *)priv + sizeof(*priv);
+ priv->map = devm_regmap_init(dev, NULL, priv,
+ &realtek_smi_mdio_regmap_config);
+ if (IS_ERR(priv->map)) {
+ ret = PTR_ERR(priv->map);
+ dev_err(dev, "regmap init failed: %d\n", ret);
+ return ret;
+ }
+
+ /* Link forward and backward */
+ priv->dev = dev;
+ priv->clk_delay = var->clk_delay;
+ priv->cmd_read = var->cmd_read;
+ priv->cmd_write = var->cmd_write;
+ priv->ops = var->ops;
+
+ priv->setup_interface = realtek_smi_setup_mdio;
+ priv->write_reg_noack = realtek_smi_write_reg_noack;
+
+ dev_set_drvdata(dev, priv);
+ spin_lock_init(&priv->lock);
+
+ /* TODO: if power is software controlled, set up any regulators here */
+
+ /* Assert then deassert RESET */
+ priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(priv->reset)) {
+ dev_err(dev, "failed to get RESET GPIO\n");
+ return PTR_ERR(priv->reset);
+ }
+ msleep(REALTEK_SMI_HW_STOP_DELAY);
+ gpiod_set_value(priv->reset, 0);
+ msleep(REALTEK_SMI_HW_START_DELAY);
+ dev_info(dev, "deasserted RESET\n");
+
+ /* Fetch MDIO pins */
+ priv->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->mdc))
+ return PTR_ERR(priv->mdc);
+ priv->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->mdio))
+ return PTR_ERR(priv->mdio);
+
+ priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
+
+ ret = priv->ops->detect(priv);
+ if (ret) {
+ dev_err(dev, "unable to detect switch\n");
+ return ret;
+ }
+
+ priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
+ if (!priv->ds)
+ return -ENOMEM;
+
+ priv->ds->dev = dev;
+ priv->ds->num_ports = priv->num_ports;
+ priv->ds->priv = priv;
+
+ priv->ds->ops = var->ds_ops_smi;
+ ret = dsa_register_switch(priv->ds);
+ if (ret) {
+ dev_err_probe(dev, ret, "unable to register switch\n");
+ return ret;
+ }
+ return 0;
+}
+
+static int realtek_smi_remove(struct platform_device *pdev)
+{
+ struct realtek_priv *priv = platform_get_drvdata(pdev);
+
+ if (!priv)
+ return 0;
+
+ dsa_unregister_switch(priv->ds);
+ if (priv->slave_mii_bus)
+ of_node_put(priv->slave_mii_bus->dev.of_node);
+ gpiod_set_value(priv->reset, 1);
+
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static void realtek_smi_shutdown(struct platform_device *pdev)
+{
+ struct realtek_priv *priv = platform_get_drvdata(pdev);
+
+ if (!priv)
+ return;
+
+ dsa_switch_shutdown(priv->ds);
+
+ platform_set_drvdata(pdev, NULL);
+}
+
+static const struct of_device_id realtek_smi_of_match[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
+ {
+ .compatible = "realtek,rtl8366rb",
+ .data = &rtl8366rb_variant,
+ },
+#endif
+ {
+ /* FIXME: add support for RTL8366S and more */
+ .compatible = "realtek,rtl8366s",
+ .data = NULL,
+ },
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
+ {
+ .compatible = "realtek,rtl8365mb",
+ .data = &rtl8365mb_variant,
+ },
+ {
+ .compatible = "realtek,rtl8367s",
+ .data = &rtl8365mb_variant,
+ },
+#endif
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
+
+static struct platform_driver realtek_smi_driver = {
+ .driver = {
+ .name = "realtek-smi",
+ .of_match_table = of_match_ptr(realtek_smi_of_match),
+ },
+ .probe = realtek_smi_probe,
+ .remove = realtek_smi_remove,
+ .shutdown = realtek_smi_shutdown,
+};
+module_platform_driver(realtek_smi_driver);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via SMI interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek/realtek.h
index 5bfa53e2480a..ed5abf6cb3d6 100644
--- a/drivers/net/dsa/realtek-smi-core.h
+++ b/drivers/net/dsa/realtek/realtek.h
@@ -13,7 +13,7 @@
#include <linux/gpio/consumer.h>
#include <net/dsa.h>
-struct realtek_smi_ops;
+struct realtek_ops;
struct dentry;
struct inode;
struct file;
@@ -25,7 +25,7 @@ struct rtl8366_mib_counter {
const char *name;
};
-/**
+/*
* struct rtl8366_vlan_mc - Virtual LAN member configuration
*/
struct rtl8366_vlan_mc {
@@ -43,13 +43,15 @@ struct rtl8366_vlan_4k {
u8 fid;
};
-struct realtek_smi {
+struct realtek_priv {
struct device *dev;
struct gpio_desc *reset;
struct gpio_desc *mdc;
struct gpio_desc *mdio;
struct regmap *map;
struct mii_bus *slave_mii_bus;
+ struct mii_bus *bus;
+ int mdio_addr;
unsigned int clk_delay;
u8 cmd_read;
@@ -65,7 +67,9 @@ struct realtek_smi {
unsigned int num_mib_counters;
struct rtl8366_mib_counter *mib_counters;
- const struct realtek_smi_ops *ops;
+ const struct realtek_ops *ops;
+ int (*setup_interface)(struct dsa_switch *ds);
+ int (*write_reg_noack)(void *ctx, u32 addr, u32 data);
int vlan_enabled;
int vlan4k_enabled;
@@ -74,61 +78,57 @@ struct realtek_smi {
void *chip_data; /* Per-chip extra variant data */
};
-/**
- * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations
+/*
+ * struct realtek_ops - vtable for the per-SMI-chiptype operations
* @detect: detects the chiptype
*/
-struct realtek_smi_ops {
- int (*detect)(struct realtek_smi *smi);
- int (*reset_chip)(struct realtek_smi *smi);
- int (*setup)(struct realtek_smi *smi);
- void (*cleanup)(struct realtek_smi *smi);
- int (*get_mib_counter)(struct realtek_smi *smi,
+struct realtek_ops {
+ int (*detect)(struct realtek_priv *priv);
+ int (*reset_chip)(struct realtek_priv *priv);
+ int (*setup)(struct realtek_priv *priv);
+ void (*cleanup)(struct realtek_priv *priv);
+ int (*get_mib_counter)(struct realtek_priv *priv,
int port,
struct rtl8366_mib_counter *mib,
u64 *mibvalue);
- int (*get_vlan_mc)(struct realtek_smi *smi, u32 index,
+ int (*get_vlan_mc)(struct realtek_priv *priv, u32 index,
struct rtl8366_vlan_mc *vlanmc);
- int (*set_vlan_mc)(struct realtek_smi *smi, u32 index,
+ int (*set_vlan_mc)(struct realtek_priv *priv, u32 index,
const struct rtl8366_vlan_mc *vlanmc);
- int (*get_vlan_4k)(struct realtek_smi *smi, u32 vid,
+ int (*get_vlan_4k)(struct realtek_priv *priv, u32 vid,
struct rtl8366_vlan_4k *vlan4k);
- int (*set_vlan_4k)(struct realtek_smi *smi,
+ int (*set_vlan_4k)(struct realtek_priv *priv,
const struct rtl8366_vlan_4k *vlan4k);
- int (*get_mc_index)(struct realtek_smi *smi, int port, int *val);
- int (*set_mc_index)(struct realtek_smi *smi, int port, int index);
- bool (*is_vlan_valid)(struct realtek_smi *smi, unsigned int vlan);
- int (*enable_vlan)(struct realtek_smi *smi, bool enable);
- int (*enable_vlan4k)(struct realtek_smi *smi, bool enable);
- int (*enable_port)(struct realtek_smi *smi, int port, bool enable);
- int (*phy_read)(struct realtek_smi *smi, int phy, int regnum);
- int (*phy_write)(struct realtek_smi *smi, int phy, int regnum,
+ int (*get_mc_index)(struct realtek_priv *priv, int port, int *val);
+ int (*set_mc_index)(struct realtek_priv *priv, int port, int index);
+ bool (*is_vlan_valid)(struct realtek_priv *priv, unsigned int vlan);
+ int (*enable_vlan)(struct realtek_priv *priv, bool enable);
+ int (*enable_vlan4k)(struct realtek_priv *priv, bool enable);
+ int (*enable_port)(struct realtek_priv *priv, int port, bool enable);
+ int (*phy_read)(struct realtek_priv *priv, int phy, int regnum);
+ int (*phy_write)(struct realtek_priv *priv, int phy, int regnum,
u16 val);
};
-struct realtek_smi_variant {
- const struct dsa_switch_ops *ds_ops;
- const struct realtek_smi_ops *ops;
+struct realtek_variant {
+ const struct dsa_switch_ops *ds_ops_smi;
+ const struct dsa_switch_ops *ds_ops_mdio;
+ const struct realtek_ops *ops;
unsigned int clk_delay;
u8 cmd_read;
u8 cmd_write;
size_t chip_data_sz;
};
-/* SMI core calls */
-int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
- u32 data);
-int realtek_smi_setup_mdio(struct realtek_smi *smi);
-
/* RTL8366 library helpers */
-int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used);
-int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used);
+int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member,
u32 untag, u32 fid);
-int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
unsigned int vid);
-int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable);
-int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable);
-int rtl8366_reset_vlan(struct realtek_smi *smi);
+int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable);
+int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable);
+int rtl8366_reset_vlan(struct realtek_priv *priv);
int rtl8366_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack);
@@ -139,7 +139,7 @@ void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset);
void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-extern const struct realtek_smi_variant rtl8366rb_variant;
-extern const struct realtek_smi_variant rtl8365mb_variant;
+extern const struct realtek_variant rtl8366rb_variant;
+extern const struct realtek_variant rtl8365mb_variant;
#endif /* _REALTEK_SMI_H */
diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index 3b729544798b..2ed592147c20 100644
--- a/drivers/net/dsa/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -99,18 +99,28 @@
#include <linux/regmap.h>
#include <linux/if_bridge.h>
-#include "realtek-smi-core.h"
+#include "realtek.h"
/* Chip-specific data and limits */
-#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367
-#define RTL8365MB_CPU_PORT_NUM_8365MB_VC 6
-#define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC 2112
+#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367
+#define RTL8365MB_CHIP_VER_8365MB_VC 0x0040
+
+#define RTL8365MB_CHIP_ID_8367S 0x6367
+#define RTL8365MB_CHIP_VER_8367S 0x00A0
+
+#define RTL8365MB_CHIP_ID_8367RB 0x6367
+#define RTL8365MB_CHIP_VER_8367RB 0x0020
/* Family-specific data and limits */
-#define RTL8365MB_PHYADDRMAX 7
-#define RTL8365MB_NUM_PHYREGS 32
-#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1)
-#define RTL8365MB_MAX_NUM_PORTS (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1)
+#define RTL8365MB_PHYADDRMAX 7
+#define RTL8365MB_NUM_PHYREGS 32
+#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1)
+/* RTL8370MB and RTL8310SR, possibly suportable by this driver, have 10 ports */
+#define RTL8365MB_MAX_NUM_PORTS 10
+#define RTL8365MB_LEARN_LIMIT_MAX 2112
+
+/* valid for all 6-port or less variants */
+static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, -1, -1};
/* Chip identification registers */
#define RTL8365MB_CHIP_ID_REG 0x1300
@@ -191,7 +201,7 @@
/* The PHY OCP addresses of PHY registers 0~31 start here */
#define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE 0xA400
-/* EXT port interface mode values - used in DIGITAL_INTERFACE_SELECT */
+/* EXT interface port mode values - used in DIGITAL_INTERFACE_SELECT */
#define RTL8365MB_EXT_PORT_MODE_DISABLE 0
#define RTL8365MB_EXT_PORT_MODE_RGMII 1
#define RTL8365MB_EXT_PORT_MODE_MII_MAC 2
@@ -207,39 +217,56 @@
#define RTL8365MB_EXT_PORT_MODE_1000X 12
#define RTL8365MB_EXT_PORT_MODE_100FX 13
-/* EXT port interface mode configuration registers 0~1 */
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extport) \
- (RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 + \
- ((_extport) >> 1) * (0x13C3 - 0x1305))
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extport) \
- (0xF << (((_extport) % 2)))
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extport) \
- (((_extport) % 2) * 4)
-
-/* EXT port RGMII TX/RX delay configuration registers 1~2 */
-#define RTL8365MB_EXT_RGMXF_REG1 0x1307
-#define RTL8365MB_EXT_RGMXF_REG2 0x13C5
-#define RTL8365MB_EXT_RGMXF_REG(_extport) \
- (RTL8365MB_EXT_RGMXF_REG1 + \
- (((_extport) >> 1) * (0x13C5 - 0x1307)))
+/* Realtek docs and driver uses logic number as EXT_PORT0=16, EXT_PORT1=17,
+ * EXT_PORT2=18, to interact with switch ports. That logic number is internally
+ * converted to either a physical port number (0..9) or an external interface id (0..2),
+ * depending on which function was called. The external interface id is calculated as
+ * (ext_id=logic_port-15), while the logical to physical map depends on the chip id/version.
+ *
+ * EXT_PORT0 mentioned in datasheets and rtl8367c driver is used in this driver
+ * as extid==1, EXT_PORT2, mentioned in Realtek rtl8367c driver for 10-port switches,
+ * would have an ext_id of 3 (out of range for most extint macros) and ext_id 0 does
+ * not seem to be used as well for this family.
+ */
+
+/* EXT interface mode configuration registers 0~1 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT1 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 /* EXT2 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \
+ ((_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 : \
+ (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 : \
+ 0x0)
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extint) \
+ (0xF << (((_extint) % 2)))
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \
+ (((_extint) % 2) * 4)
+
+/* EXT interface RGMII TX/RX delay configuration registers 0~2 */
+#define RTL8365MB_EXT_RGMXF_REG0 0x1306 /* EXT0 */
+#define RTL8365MB_EXT_RGMXF_REG1 0x1307 /* EXT1 */
+#define RTL8365MB_EXT_RGMXF_REG2 0x13C5 /* EXT2 */
+#define RTL8365MB_EXT_RGMXF_REG(_extint) \
+ ((_extint) == 0 ? RTL8365MB_EXT_RGMXF_REG0 : \
+ (_extint) == 1 ? RTL8365MB_EXT_RGMXF_REG1 : \
+ (_extint) == 2 ? RTL8365MB_EXT_RGMXF_REG2 : \
+ 0x0)
#define RTL8365MB_EXT_RGMXF_RXDELAY_MASK 0x0007
#define RTL8365MB_EXT_RGMXF_TXDELAY_MASK 0x0008
-/* External port speed values - used in DIGITAL_INTERFACE_FORCE */
+/* External interface port speed values - used in DIGITAL_INTERFACE_FORCE */
#define RTL8365MB_PORT_SPEED_10M 0
#define RTL8365MB_PORT_SPEED_100M 1
#define RTL8365MB_PORT_SPEED_1000M 2
-/* EXT port force configuration registers 0~2 */
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extport) \
- (RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 + \
- ((_extport) & 0x1) + \
- ((((_extport) >> 1) & 0x1) * (0x13C4 - 0x1310)))
+/* EXT interface force configuration registers 0~2 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 /* EXT0 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 /* EXT1 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 /* EXT2 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extint) \
+ ((_extint) == 0 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 : \
+ (_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 : \
+ (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 : \
+ 0x0)
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_EN_MASK 0x1000
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_NWAY_MASK 0x0080
#define RTL8365MB_DIGITAL_INTERFACE_FORCE_TXPAUSE_MASK 0x0040
@@ -516,7 +543,7 @@ struct rtl8365mb_cpu {
/**
* struct rtl8365mb_port - private per-port data
- * @smi: pointer to parent realtek_smi data
+ * @priv: pointer to parent realtek_priv data
* @index: DSA port index, same as dsa_port::index
* @stats: link statistics populated by rtl8365mb_stats_poll, ready for atomic
* access via rtl8365mb_get_stats64
@@ -524,7 +551,7 @@ struct rtl8365mb_cpu {
* @mib_work: delayed work for polling MIB counters
*/
struct rtl8365mb_port {
- struct realtek_smi *smi;
+ struct realtek_priv *priv;
unsigned int index;
struct rtnl_link_stats64 stats;
spinlock_t stats_lock;
@@ -533,13 +560,12 @@ struct rtl8365mb_port {
/**
* struct rtl8365mb - private chip-specific driver data
- * @smi: pointer to parent realtek_smi data
+ * @priv: pointer to parent realtek_priv data
* @irq: registered IRQ or zero
* @chip_id: chip identifier
* @chip_ver: chip silicon revision
* @port_mask: mask of all ports
* @learn_limit_max: maximum number of L2 addresses the chip can learn
- * @cpu: CPU tagging and CPU port configuration for this chip
* @mib_lock: prevent concurrent reads of MIB counters
* @ports: per-port data
* @jam_table: chip-specific initialization jam table
@@ -548,29 +574,28 @@ struct rtl8365mb_port {
* Private data for this driver.
*/
struct rtl8365mb {
- struct realtek_smi *smi;
+ struct realtek_priv *priv;
int irq;
u32 chip_id;
u32 chip_ver;
u32 port_mask;
u32 learn_limit_max;
- struct rtl8365mb_cpu cpu;
struct mutex mib_lock;
struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS];
const struct rtl8365mb_jam_tbl_entry *jam_table;
size_t jam_size;
};
-static int rtl8365mb_phy_poll_busy(struct realtek_smi *smi)
+static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv)
{
u32 val;
- return regmap_read_poll_timeout(smi->map,
+ return regmap_read_poll_timeout(priv->map,
RTL8365MB_INDIRECT_ACCESS_STATUS_REG,
val, !val, 10, 100);
}
-static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_prepare(struct realtek_priv *priv, int phy,
u32 ocp_addr)
{
u32 val;
@@ -579,7 +604,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
/* Set OCP prefix */
val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr);
ret = regmap_update_bits(
- smi->map, RTL8365MB_GPHY_OCP_MSB_0_REG,
+ priv->map, RTL8365MB_GPHY_OCP_MSB_0_REG,
RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK,
FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val));
if (ret)
@@ -592,7 +617,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
ocp_addr >> 1);
val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK,
ocp_addr >> 6);
- ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG,
+ ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG,
val);
if (ret)
return ret;
@@ -600,17 +625,17 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
return 0;
}
-static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy,
u32 ocp_addr, u16 *data)
{
u32 val;
int ret;
- ret = rtl8365mb_phy_poll_busy(smi);
+ ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
return ret;
- ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr);
+ ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr);
if (ret)
return ret;
@@ -619,16 +644,16 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) |
FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK,
RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ);
- ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
+ ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
if (ret)
return ret;
- ret = rtl8365mb_phy_poll_busy(smi);
+ ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
return ret;
/* Get PHY register data */
- ret = regmap_read(smi->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG,
+ ret = regmap_read(priv->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG,
&val);
if (ret)
return ret;
@@ -638,22 +663,22 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
return 0;
}
-static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
u32 ocp_addr, u16 data)
{
u32 val;
int ret;
- ret = rtl8365mb_phy_poll_busy(smi);
+ ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
return ret;
- ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr);
+ ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr);
if (ret)
return ret;
/* Set PHY register data */
- ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG,
+ ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG,
data);
if (ret)
return ret;
@@ -663,18 +688,18 @@ static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy,
RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) |
FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK,
RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE);
- ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
+ ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
if (ret)
return ret;
- ret = rtl8365mb_phy_poll_busy(smi);
+ ret = rtl8365mb_phy_poll_busy(priv);
if (ret)
return ret;
return 0;
}
-static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum)
+static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)
{
u32 ocp_addr;
u16 val;
@@ -688,21 +713,21 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum)
ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;
- ret = rtl8365mb_phy_ocp_read(smi, phy, ocp_addr, &val);
+ ret = rtl8365mb_phy_ocp_read(priv, phy, ocp_addr, &val);
if (ret) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to read PHY%d reg %02x @ %04x, ret %d\n", phy,
regnum, ocp_addr, ret);
return ret;
}
- dev_dbg(smi->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n",
+ dev_dbg(priv->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n",
phy, regnum, ocp_addr, val);
return val;
}
-static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum,
+static int rtl8365mb_phy_write(struct realtek_priv *priv, int phy, int regnum,
u16 val)
{
u32 ocp_addr;
@@ -716,20 +741,31 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum,
ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;
- ret = rtl8365mb_phy_ocp_write(smi, phy, ocp_addr, val);
+ ret = rtl8365mb_phy_ocp_write(priv, phy, ocp_addr, val);
if (ret) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to write PHY%d reg %02x @ %04x, ret %d\n", phy,
regnum, ocp_addr, ret);
return ret;
}
- dev_dbg(smi->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n",
+ dev_dbg(priv->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n",
phy, regnum, ocp_addr, val);
return 0;
}
+static int rtl8365mb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+ return rtl8365mb_phy_read(ds->priv, phy, regnum);
+}
+
+static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
+ u16 val)
+{
+ return rtl8365mb_phy_write(ds->priv, phy, regnum, val);
+}
+
static enum dsa_tag_protocol
rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
@@ -737,25 +773,25 @@ rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
return DSA_TAG_PROTO_RTL8_4;
}
-static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
+static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
phy_interface_t interface)
{
struct device_node *dn;
struct dsa_port *dp;
int tx_delay = 0;
int rx_delay = 0;
- int ext_port;
+ int ext_int;
u32 val;
int ret;
- if (port == smi->cpu_port) {
- ext_port = 1;
- } else {
- dev_err(smi->dev, "only one EXT port is currently supported\n");
+ ext_int = rtl8365mb_extint_port_map[port];
+
+ if (ext_int <= 0) {
+ dev_err(priv->dev, "Port %d is not an external interface port\n", port);
return -EINVAL;
}
- dp = dsa_to_port(smi->ds, port);
+ dp = dsa_to_port(priv->ds, port);
dn = dp->dn;
/* Set the RGMII TX/RX delay
@@ -786,8 +822,8 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
if (val == 0 || val == 2)
tx_delay = val / 2;
else
- dev_warn(smi->dev,
- "EXT port TX delay must be 0 or 2 ns\n");
+ dev_warn(priv->dev,
+ "EXT interface TX delay must be 0 or 2 ns\n");
}
if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) {
@@ -796,12 +832,12 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
if (val <= 7)
rx_delay = val;
else
- dev_warn(smi->dev,
- "EXT port RX delay must be 0 to 2.1 ns\n");
+ dev_warn(priv->dev,
+ "EXT interface RX delay must be 0 to 2.1 ns\n");
}
ret = regmap_update_bits(
- smi->map, RTL8365MB_EXT_RGMXF_REG(ext_port),
+ priv->map, RTL8365MB_EXT_RGMXF_REG(ext_int),
RTL8365MB_EXT_RGMXF_TXDELAY_MASK |
RTL8365MB_EXT_RGMXF_RXDELAY_MASK,
FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) |
@@ -810,18 +846,18 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
return ret;
ret = regmap_update_bits(
- smi->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_port),
- RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_port),
+ priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_int),
+ RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_int),
RTL8365MB_EXT_PORT_MODE_RGMII
<< RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(
- ext_port));
+ ext_int));
if (ret)
return ret;
return 0;
}
-static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
+static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port,
bool link, int speed, int duplex,
bool tx_pause, bool rx_pause)
{
@@ -830,14 +866,14 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
u32 r_duplex;
u32 r_speed;
u32 r_link;
- int ext_port;
+ int ext_int;
int val;
int ret;
- if (port == smi->cpu_port) {
- ext_port = 1;
- } else {
- dev_err(smi->dev, "only one EXT port is currently supported\n");
+ ext_int = rtl8365mb_extint_port_map[port];
+
+ if (ext_int <= 0) {
+ dev_err(priv->dev, "Port %d is not an external interface port\n", port);
return -EINVAL;
}
@@ -854,7 +890,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
} else if (speed == SPEED_10) {
r_speed = RTL8365MB_PORT_SPEED_10M;
} else {
- dev_err(smi->dev, "unsupported port speed %s\n",
+ dev_err(priv->dev, "unsupported port speed %s\n",
phy_speed_to_str(speed));
return -EINVAL;
}
@@ -864,7 +900,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
} else if (duplex == DUPLEX_HALF) {
r_duplex = 0;
} else {
- dev_err(smi->dev, "unsupported duplex %s\n",
+ dev_err(priv->dev, "unsupported duplex %s\n",
phy_duplex_to_str(duplex));
return -EINVAL;
}
@@ -886,8 +922,8 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_DUPLEX_MASK,
r_duplex) |
FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed);
- ret = regmap_write(smi->map,
- RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_port),
+ ret = regmap_write(priv->map,
+ RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_int),
val);
if (ret)
return ret;
@@ -898,13 +934,17 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port,
phy_interface_t interface)
{
- if (dsa_is_user_port(ds, port) &&
+ int ext_int;
+
+ ext_int = rtl8365mb_extint_port_map[port];
+
+ if (ext_int < 0 &&
(interface == PHY_INTERFACE_MODE_NA ||
interface == PHY_INTERFACE_MODE_INTERNAL ||
interface == PHY_INTERFACE_MODE_GMII))
/* Internal PHY */
return true;
- else if (dsa_is_cpu_port(ds, port) &&
+ else if ((ext_int >= 1) &&
phy_interface_mode_is_rgmii(interface))
/* Extension MAC */
return true;
@@ -912,65 +952,43 @@ static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port,
return false;
}
-static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- struct realtek_smi *smi = ds->priv;
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 };
-
- /* include/linux/phylink.h says:
- * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink
- * expects the MAC driver to return all supported link modes.
- */
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- !rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
- dev_err(smi->dev, "phy mode %s is unsupported on port %d\n",
- phy_modes(state->interface), port);
- linkmode_zero(supported);
- return;
- }
-
- phylink_set_port_modes(mask);
-
- phylink_set(mask, Autoneg);
- phylink_set(mask, Pause);
- phylink_set(mask, Asym_Pause);
-
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
- phylink_set(mask, 1000baseT_Full);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
+ if (dsa_is_user_port(ds, port))
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
+ else if (dsa_is_cpu_port(ds, port))
+ phy_interface_set_rgmii(config->supported_interfaces);
+
+ config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000FD;
}
static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
if (!rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
- dev_err(smi->dev, "phy mode %s is unsupported on port %d\n",
+ dev_err(priv->dev, "phy mode %s is unsupported on port %d\n",
phy_modes(state->interface), port);
return;
}
if (mode != MLO_AN_PHY && mode != MLO_AN_FIXED) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"port %d supports only conventional PHY or fixed-link\n",
port);
return;
}
if (phy_interface_mode_is_rgmii(state->interface)) {
- ret = rtl8365mb_ext_config_rgmii(smi, port, state->interface);
+ ret = rtl8365mb_ext_config_rgmii(priv, port, state->interface);
if (ret)
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to configure RGMII mode on port %d: %d\n",
port, ret);
return;
@@ -985,20 +1003,20 @@ static void rtl8365mb_phylink_mac_link_down(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb_port *p;
struct rtl8365mb *mb;
int ret;
- mb = smi->chip_data;
+ mb = priv->chip_data;
p = &mb->ports[port];
cancel_delayed_work_sync(&p->mib_work);
if (phy_interface_mode_is_rgmii(interface)) {
- ret = rtl8365mb_ext_config_forcemode(smi, port, false, 0, 0,
+ ret = rtl8365mb_ext_config_forcemode(priv, port, false, 0, 0,
false, false);
if (ret)
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to reset forced mode on port %d: %d\n",
port, ret);
@@ -1013,21 +1031,21 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port,
int duplex, bool tx_pause,
bool rx_pause)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb_port *p;
struct rtl8365mb *mb;
int ret;
- mb = smi->chip_data;
+ mb = priv->chip_data;
p = &mb->ports[port];
schedule_delayed_work(&p->mib_work, 0);
if (phy_interface_mode_is_rgmii(interface)) {
- ret = rtl8365mb_ext_config_forcemode(smi, port, true, speed,
+ ret = rtl8365mb_ext_config_forcemode(priv, port, true, speed,
duplex, tx_pause,
rx_pause);
if (ret)
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to force mode on port %d: %d\n", port,
ret);
@@ -1038,7 +1056,7 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port,
static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
u8 state)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
enum rtl8365mb_stp_state val;
int msti = 0;
@@ -1057,36 +1075,36 @@ static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
val = RTL8365MB_STP_STATE_FORWARDING;
break;
default:
- dev_err(smi->dev, "invalid STP state: %u\n", state);
+ dev_err(priv->dev, "invalid STP state: %u\n", state);
return;
}
- regmap_update_bits(smi->map, RTL8365MB_MSTI_CTRL_REG(msti, port),
+ regmap_update_bits(priv->map, RTL8365MB_MSTI_CTRL_REG(msti, port),
RTL8365MB_MSTI_CTRL_PORT_STATE_MASK(port),
val << RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET(port));
}
-static int rtl8365mb_port_set_learning(struct realtek_smi *smi, int port,
+static int rtl8365mb_port_set_learning(struct realtek_priv *priv, int port,
bool enable)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
/* Enable/disable learning by limiting the number of L2 addresses the
* port can learn. Realtek documentation states that a limit of zero
* disables learning. When enabling learning, set it to the chip's
* maximum.
*/
- return regmap_write(smi->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port),
+ return regmap_write(priv->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port),
enable ? mb->learn_limit_max : 0);
}
-static int rtl8365mb_port_set_isolation(struct realtek_smi *smi, int port,
+static int rtl8365mb_port_set_isolation(struct realtek_priv *priv, int port,
u32 mask)
{
- return regmap_write(smi->map, RTL8365MB_PORT_ISOLATION_REG(port), mask);
+ return regmap_write(priv->map, RTL8365MB_PORT_ISOLATION_REG(port), mask);
}
-static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
+static int rtl8365mb_mib_counter_read(struct realtek_priv *priv, int port,
u32 offset, u32 length, u64 *mibvalue)
{
u64 tmpvalue = 0;
@@ -1098,13 +1116,13 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
* and then poll the control register before reading the value from some
* counter registers.
*/
- ret = regmap_write(smi->map, RTL8365MB_MIB_ADDRESS_REG,
+ ret = regmap_write(priv->map, RTL8365MB_MIB_ADDRESS_REG,
RTL8365MB_MIB_ADDRESS(port, offset));
if (ret)
return ret;
/* Poll for completion */
- ret = regmap_read_poll_timeout(smi->map, RTL8365MB_MIB_CTRL0_REG, val,
+ ret = regmap_read_poll_timeout(priv->map, RTL8365MB_MIB_CTRL0_REG, val,
!(val & RTL8365MB_MIB_CTRL0_BUSY_MASK),
10, 100);
if (ret)
@@ -1126,7 +1144,7 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
/* Read the MIB counter 16 bits at a time */
for (i = 0; i < length; i++) {
- ret = regmap_read(smi->map,
+ ret = regmap_read(priv->map,
RTL8365MB_MIB_COUNTER_REG(offset - i), &val);
if (ret)
return ret;
@@ -1142,21 +1160,21 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
static void rtl8365mb_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb *mb;
int ret;
int i;
- mb = smi->chip_data;
+ mb = priv->chip_data;
mutex_lock(&mb->mib_lock);
for (i = 0; i < RTL8365MB_MIB_END; i++) {
struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i];
- ret = rtl8365mb_mib_counter_read(smi, port, mib->offset,
+ ret = rtl8365mb_mib_counter_read(priv, port, mib->offset,
mib->length, &data[i]);
if (ret) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to read port %d counters: %d\n", port,
ret);
break;
@@ -1190,15 +1208,15 @@ static int rtl8365mb_get_sset_count(struct dsa_switch *ds, int port, int sset)
static void rtl8365mb_get_phy_stats(struct dsa_switch *ds, int port,
struct ethtool_eth_phy_stats *phy_stats)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb_mib_counter *mib;
struct rtl8365mb *mb;
- mb = smi->chip_data;
+ mb = priv->chip_data;
mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3StatsSymbolErrors];
mutex_lock(&mb->mib_lock);
- rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length,
+ rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length,
&phy_stats->SymbolErrorDuringCarrier);
mutex_unlock(&mb->mib_lock);
}
@@ -1226,12 +1244,12 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
[RTL8365MB_MIB_dot3StatsExcessiveCollisions] = 1,
};
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb *mb;
int ret;
int i;
- mb = smi->chip_data;
+ mb = priv->chip_data;
mutex_lock(&mb->mib_lock);
for (i = 0; i < RTL8365MB_MIB_END; i++) {
@@ -1241,7 +1259,7 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
if (!cnt[i])
continue;
- ret = rtl8365mb_mib_counter_read(smi, port, mib->offset,
+ ret = rtl8365mb_mib_counter_read(priv, port, mib->offset,
mib->length, &cnt[i]);
if (ret)
break;
@@ -1291,20 +1309,20 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
static void rtl8365mb_get_ctrl_stats(struct dsa_switch *ds, int port,
struct ethtool_eth_ctrl_stats *ctrl_stats)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb_mib_counter *mib;
struct rtl8365mb *mb;
- mb = smi->chip_data;
+ mb = priv->chip_data;
mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3ControlInUnknownOpcodes];
mutex_lock(&mb->mib_lock);
- rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length,
+ rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length,
&ctrl_stats->UnsupportedOpcodesReceived);
mutex_unlock(&mb->mib_lock);
}
-static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
+static void rtl8365mb_stats_update(struct realtek_priv *priv, int port)
{
u64 cnt[RTL8365MB_MIB_END] = {
[RTL8365MB_MIB_ifOutOctets] = 1,
@@ -1323,7 +1341,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
[RTL8365MB_MIB_dot3StatsFCSErrors] = 1,
[RTL8365MB_MIB_dot3StatsLateCollisions] = 1,
};
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
struct rtnl_link_stats64 *stats;
int ret;
int i;
@@ -1338,7 +1356,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
if (!cnt[i])
continue;
- ret = rtl8365mb_mib_counter_read(smi, port, c->offset,
+ ret = rtl8365mb_mib_counter_read(priv, port, c->offset,
c->length, &cnt[i]);
if (ret)
break;
@@ -1388,9 +1406,9 @@ static void rtl8365mb_stats_poll(struct work_struct *work)
struct rtl8365mb_port *p = container_of(to_delayed_work(work),
struct rtl8365mb_port,
mib_work);
- struct realtek_smi *smi = p->smi;
+ struct realtek_priv *priv = p->priv;
- rtl8365mb_stats_update(smi, p->index);
+ rtl8365mb_stats_update(priv, p->index);
schedule_delayed_work(&p->mib_work, RTL8365MB_STATS_INTERVAL_JIFFIES);
}
@@ -1398,11 +1416,11 @@ static void rtl8365mb_stats_poll(struct work_struct *work)
static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port,
struct rtnl_link_stats64 *s)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8365mb_port *p;
struct rtl8365mb *mb;
- mb = smi->chip_data;
+ mb = priv->chip_data;
p = &mb->ports[port];
spin_lock(&p->stats_lock);
@@ -1410,9 +1428,9 @@ static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port,
spin_unlock(&p->stats_lock);
}
-static void rtl8365mb_stats_setup(struct realtek_smi *smi)
+static void rtl8365mb_stats_setup(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
int i;
/* Per-chip global mutex to protect MIB counter access, since doing
@@ -1420,10 +1438,10 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi)
*/
mutex_init(&mb->mib_lock);
- for (i = 0; i < smi->num_ports; i++) {
+ for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(smi->ds, i))
+ if (dsa_is_unused_port(priv->ds, i))
continue;
/* Per-port spinlock to protect the stats64 data */
@@ -1436,45 +1454,45 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi)
}
}
-static void rtl8365mb_stats_teardown(struct realtek_smi *smi)
+static void rtl8365mb_stats_teardown(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
int i;
- for (i = 0; i < smi->num_ports; i++) {
+ for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(smi->ds, i))
+ if (dsa_is_unused_port(priv->ds, i))
continue;
cancel_delayed_work_sync(&p->mib_work);
}
}
-static int rtl8365mb_get_and_clear_status_reg(struct realtek_smi *smi, u32 reg,
+static int rtl8365mb_get_and_clear_status_reg(struct realtek_priv *priv, u32 reg,
u32 *val)
{
int ret;
- ret = regmap_read(smi->map, reg, val);
+ ret = regmap_read(priv->map, reg, val);
if (ret)
return ret;
- return regmap_write(smi->map, reg, *val);
+ return regmap_write(priv->map, reg, *val);
}
static irqreturn_t rtl8365mb_irq(int irq, void *data)
{
- struct realtek_smi *smi = data;
+ struct realtek_priv *priv = data;
unsigned long line_changes = 0;
struct rtl8365mb *mb;
u32 stat;
int line;
int ret;
- mb = smi->chip_data;
+ mb = priv->chip_data;
- ret = rtl8365mb_get_and_clear_status_reg(smi, RTL8365MB_INTR_STATUS_REG,
+ ret = rtl8365mb_get_and_clear_status_reg(priv, RTL8365MB_INTR_STATUS_REG,
&stat);
if (ret)
goto out_error;
@@ -1485,14 +1503,14 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
u32 val;
ret = rtl8365mb_get_and_clear_status_reg(
- smi, RTL8365MB_PORT_LINKUP_IND_REG, &val);
+ priv, RTL8365MB_PORT_LINKUP_IND_REG, &val);
if (ret)
goto out_error;
linkup_ind = FIELD_GET(RTL8365MB_PORT_LINKUP_IND_MASK, val);
ret = rtl8365mb_get_and_clear_status_reg(
- smi, RTL8365MB_PORT_LINKDOWN_IND_REG, &val);
+ priv, RTL8365MB_PORT_LINKDOWN_IND_REG, &val);
if (ret)
goto out_error;
@@ -1504,8 +1522,8 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
if (!line_changes)
goto out_none;
- for_each_set_bit(line, &line_changes, smi->num_ports) {
- int child_irq = irq_find_mapping(smi->irqdomain, line);
+ for_each_set_bit(line, &line_changes, priv->num_ports) {
+ int child_irq = irq_find_mapping(priv->irqdomain, line);
handle_nested_irq(child_irq);
}
@@ -1513,7 +1531,7 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
return IRQ_HANDLED;
out_error:
- dev_err(smi->dev, "failed to read interrupt status: %d\n", ret);
+ dev_err(priv->dev, "failed to read interrupt status: %d\n", ret);
out_none:
return IRQ_NONE;
@@ -1548,27 +1566,27 @@ static const struct irq_domain_ops rtl8365mb_irqdomain_ops = {
.xlate = irq_domain_xlate_onecell,
};
-static int rtl8365mb_set_irq_enable(struct realtek_smi *smi, bool enable)
+static int rtl8365mb_set_irq_enable(struct realtek_priv *priv, bool enable)
{
- return regmap_update_bits(smi->map, RTL8365MB_INTR_CTRL_REG,
+ return regmap_update_bits(priv->map, RTL8365MB_INTR_CTRL_REG,
RTL8365MB_INTR_LINK_CHANGE_MASK,
FIELD_PREP(RTL8365MB_INTR_LINK_CHANGE_MASK,
enable ? 1 : 0));
}
-static int rtl8365mb_irq_enable(struct realtek_smi *smi)
+static int rtl8365mb_irq_enable(struct realtek_priv *priv)
{
- return rtl8365mb_set_irq_enable(smi, true);
+ return rtl8365mb_set_irq_enable(priv, true);
}
-static int rtl8365mb_irq_disable(struct realtek_smi *smi)
+static int rtl8365mb_irq_disable(struct realtek_priv *priv)
{
- return rtl8365mb_set_irq_enable(smi, false);
+ return rtl8365mb_set_irq_enable(priv, false);
}
-static int rtl8365mb_irq_setup(struct realtek_smi *smi)
+static int rtl8365mb_irq_setup(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
struct device_node *intc;
u32 irq_trig;
int virq;
@@ -1577,9 +1595,9 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
int ret;
int i;
- intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller");
+ intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller");
if (!intc) {
- dev_err(smi->dev, "missing child interrupt-controller node\n");
+ dev_err(priv->dev, "missing child interrupt-controller node\n");
return -EINVAL;
}
@@ -1587,24 +1605,24 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
irq = of_irq_get(intc, 0);
if (irq <= 0) {
if (irq != -EPROBE_DEFER)
- dev_err(smi->dev, "failed to get parent irq: %d\n",
+ dev_err(priv->dev, "failed to get parent irq: %d\n",
irq);
ret = irq ? irq : -EINVAL;
goto out_put_node;
}
- smi->irqdomain = irq_domain_add_linear(intc, smi->num_ports,
- &rtl8365mb_irqdomain_ops, smi);
- if (!smi->irqdomain) {
- dev_err(smi->dev, "failed to add irq domain\n");
+ priv->irqdomain = irq_domain_add_linear(intc, priv->num_ports,
+ &rtl8365mb_irqdomain_ops, priv);
+ if (!priv->irqdomain) {
+ dev_err(priv->dev, "failed to add irq domain\n");
ret = -ENOMEM;
goto out_put_node;
}
- for (i = 0; i < smi->num_ports; i++) {
- virq = irq_create_mapping(smi->irqdomain, i);
+ for (i = 0; i < priv->num_ports; i++) {
+ virq = irq_create_mapping(priv->irqdomain, i);
if (!virq) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to create irq domain mapping\n");
ret = -EINVAL;
goto out_remove_irqdomain;
@@ -1625,40 +1643,40 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
val = RTL8365MB_INTR_POLARITY_LOW;
break;
default:
- dev_err(smi->dev, "unsupported irq trigger type %u\n",
+ dev_err(priv->dev, "unsupported irq trigger type %u\n",
irq_trig);
ret = -EINVAL;
goto out_remove_irqdomain;
}
- ret = regmap_update_bits(smi->map, RTL8365MB_INTR_POLARITY_REG,
+ ret = regmap_update_bits(priv->map, RTL8365MB_INTR_POLARITY_REG,
RTL8365MB_INTR_POLARITY_MASK,
FIELD_PREP(RTL8365MB_INTR_POLARITY_MASK, val));
if (ret)
goto out_remove_irqdomain;
/* Disable the interrupt in case the chip has it enabled on reset */
- ret = rtl8365mb_irq_disable(smi);
+ ret = rtl8365mb_irq_disable(priv);
if (ret)
goto out_remove_irqdomain;
/* Clear the interrupt status register */
- ret = regmap_write(smi->map, RTL8365MB_INTR_STATUS_REG,
+ ret = regmap_write(priv->map, RTL8365MB_INTR_STATUS_REG,
RTL8365MB_INTR_ALL_MASK);
if (ret)
goto out_remove_irqdomain;
ret = request_threaded_irq(irq, NULL, rtl8365mb_irq, IRQF_ONESHOT,
- "rtl8365mb", smi);
+ "rtl8365mb", priv);
if (ret) {
- dev_err(smi->dev, "failed to request irq: %d\n", ret);
+ dev_err(priv->dev, "failed to request irq: %d\n", ret);
goto out_remove_irqdomain;
}
/* Store the irq so that we know to free it during teardown */
mb->irq = irq;
- ret = rtl8365mb_irq_enable(smi);
+ ret = rtl8365mb_irq_enable(priv);
if (ret)
goto out_free_irq;
@@ -1667,17 +1685,17 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
return 0;
out_free_irq:
- free_irq(mb->irq, smi);
+ free_irq(mb->irq, priv);
mb->irq = 0;
out_remove_irqdomain:
- for (i = 0; i < smi->num_ports; i++) {
- virq = irq_find_mapping(smi->irqdomain, i);
+ for (i = 0; i < priv->num_ports; i++) {
+ virq = irq_find_mapping(priv->irqdomain, i);
irq_dispose_mapping(virq);
}
- irq_domain_remove(smi->irqdomain);
- smi->irqdomain = NULL;
+ irq_domain_remove(priv->irqdomain);
+ priv->irqdomain = NULL;
out_put_node:
of_node_put(intc);
@@ -1685,36 +1703,34 @@ out_put_node:
return ret;
}
-static void rtl8365mb_irq_teardown(struct realtek_smi *smi)
+static void rtl8365mb_irq_teardown(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
int virq;
int i;
if (mb->irq) {
- free_irq(mb->irq, smi);
+ free_irq(mb->irq, priv);
mb->irq = 0;
}
- if (smi->irqdomain) {
- for (i = 0; i < smi->num_ports; i++) {
- virq = irq_find_mapping(smi->irqdomain, i);
+ if (priv->irqdomain) {
+ for (i = 0; i < priv->num_ports; i++) {
+ virq = irq_find_mapping(priv->irqdomain, i);
irq_dispose_mapping(virq);
}
- irq_domain_remove(smi->irqdomain);
- smi->irqdomain = NULL;
+ irq_domain_remove(priv->irqdomain);
+ priv->irqdomain = NULL;
}
}
-static int rtl8365mb_cpu_config(struct realtek_smi *smi)
+static int rtl8365mb_cpu_config(struct realtek_priv *priv, const struct rtl8365mb_cpu *cpu)
{
- struct rtl8365mb *mb = smi->chip_data;
- struct rtl8365mb_cpu *cpu = &mb->cpu;
u32 val;
int ret;
- ret = regmap_update_bits(smi->map, RTL8365MB_CPU_PORT_MASK_REG,
+ ret = regmap_update_bits(priv->map, RTL8365MB_CPU_PORT_MASK_REG,
RTL8365MB_CPU_PORT_MASK_MASK,
FIELD_PREP(RTL8365MB_CPU_PORT_MASK_MASK,
cpu->mask));
@@ -1726,26 +1742,26 @@ static int rtl8365mb_cpu_config(struct realtek_smi *smi)
FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_POSITION_MASK, cpu->position) |
FIELD_PREP(RTL8365MB_CPU_CTRL_RXBYTECOUNT_MASK, cpu->rx_length) |
FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_FORMAT_MASK, cpu->format) |
- FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port) |
+ FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port & 0x7) |
FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_EXT_MASK,
- cpu->trap_port >> 3);
- ret = regmap_write(smi->map, RTL8365MB_CPU_CTRL_REG, val);
+ cpu->trap_port >> 3 & 0x1);
+ ret = regmap_write(priv->map, RTL8365MB_CPU_CTRL_REG, val);
if (ret)
return ret;
return 0;
}
-static int rtl8365mb_switch_init(struct realtek_smi *smi)
+static int rtl8365mb_switch_init(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
int ret;
int i;
/* Do any chip-specific init jam before getting to the common stuff */
if (mb->jam_table) {
for (i = 0; i < mb->jam_size; i++) {
- ret = regmap_write(smi->map, mb->jam_table[i].reg,
+ ret = regmap_write(priv->map, mb->jam_table[i].reg,
mb->jam_table[i].val);
if (ret)
return ret;
@@ -1754,7 +1770,7 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi)
/* Common init jam */
for (i = 0; i < ARRAY_SIZE(rtl8365mb_init_jam_common); i++) {
- ret = regmap_write(smi->map, rtl8365mb_init_jam_common[i].reg,
+ ret = regmap_write(priv->map, rtl8365mb_init_jam_common[i].reg,
rtl8365mb_init_jam_common[i].val);
if (ret)
return ret;
@@ -1763,75 +1779,86 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi)
return 0;
}
-static int rtl8365mb_reset_chip(struct realtek_smi *smi)
+static int rtl8365mb_reset_chip(struct realtek_priv *priv)
{
u32 val;
- realtek_smi_write_reg_noack(smi, RTL8365MB_CHIP_RESET_REG,
- FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK,
- 1));
+ priv->write_reg_noack(priv, RTL8365MB_CHIP_RESET_REG,
+ FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK, 1));
/* Realtek documentation says the chip needs 1 second to reset. Sleep
* for 100 ms before accessing any registers to prevent ACK timeouts.
*/
msleep(100);
- return regmap_read_poll_timeout(smi->map, RTL8365MB_CHIP_RESET_REG, val,
+ return regmap_read_poll_timeout(priv->map, RTL8365MB_CHIP_RESET_REG, val,
!(val & RTL8365MB_CHIP_RESET_HW_MASK),
20000, 1e6);
}
static int rtl8365mb_setup(struct dsa_switch *ds)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
+ struct rtl8365mb_cpu cpu = {0};
+ struct dsa_port *cpu_dp;
struct rtl8365mb *mb;
int ret;
int i;
- mb = smi->chip_data;
+ mb = priv->chip_data;
- ret = rtl8365mb_reset_chip(smi);
+ ret = rtl8365mb_reset_chip(priv);
if (ret) {
- dev_err(smi->dev, "failed to reset chip: %d\n", ret);
+ dev_err(priv->dev, "failed to reset chip: %d\n", ret);
goto out_error;
}
/* Configure switch to vendor-defined initial state */
- ret = rtl8365mb_switch_init(smi);
+ ret = rtl8365mb_switch_init(priv);
if (ret) {
- dev_err(smi->dev, "failed to initialize switch: %d\n", ret);
+ dev_err(priv->dev, "failed to initialize switch: %d\n", ret);
goto out_error;
}
/* Set up cascading IRQs */
- ret = rtl8365mb_irq_setup(smi);
+ ret = rtl8365mb_irq_setup(priv);
if (ret == -EPROBE_DEFER)
return ret;
else if (ret)
- dev_info(smi->dev, "no interrupt support\n");
+ dev_info(priv->dev, "no interrupt support\n");
/* Configure CPU tagging */
- ret = rtl8365mb_cpu_config(smi);
+ cpu.trap_port = RTL8365MB_MAX_NUM_PORTS;
+ dsa_switch_for_each_cpu_port(cpu_dp, priv->ds) {
+ cpu.mask |= BIT(cpu_dp->index);
+
+ if (cpu.trap_port == RTL8365MB_MAX_NUM_PORTS)
+ cpu.trap_port = cpu_dp->index;
+ }
+
+ cpu.enable = cpu.mask > 0;
+ cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
+ cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
+ cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
+ cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
+
+ ret = rtl8365mb_cpu_config(priv, &cpu);
if (ret)
goto out_teardown_irq;
/* Configure ports */
- for (i = 0; i < smi->num_ports; i++) {
+ for (i = 0; i < priv->num_ports; i++) {
struct rtl8365mb_port *p = &mb->ports[i];
- if (dsa_is_unused_port(smi->ds, i))
+ if (dsa_is_unused_port(priv->ds, i))
continue;
- /* Set up per-port private data */
- p->smi = smi;
- p->index = i;
-
/* Forward only to the CPU */
- ret = rtl8365mb_port_set_isolation(smi, i, BIT(smi->cpu_port));
+ ret = rtl8365mb_port_set_isolation(priv, i, cpu.mask);
if (ret)
goto out_teardown_irq;
/* Disable learning */
- ret = rtl8365mb_port_set_learning(smi, i, false);
+ ret = rtl8365mb_port_set_learning(priv, i, false);
if (ret)
goto out_teardown_irq;
@@ -1839,29 +1866,35 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
* ports will still forward frames to the CPU despite being
* administratively down by default.
*/
- rtl8365mb_port_stp_state_set(smi->ds, i, BR_STATE_DISABLED);
+ rtl8365mb_port_stp_state_set(priv->ds, i, BR_STATE_DISABLED);
+
+ /* Set up per-port private data */
+ p->priv = priv;
+ p->index = i;
}
/* Set maximum packet length to 1536 bytes */
- ret = regmap_update_bits(smi->map, RTL8365MB_CFG0_MAX_LEN_REG,
+ ret = regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG,
RTL8365MB_CFG0_MAX_LEN_MASK,
FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536));
if (ret)
goto out_teardown_irq;
- ret = realtek_smi_setup_mdio(smi);
- if (ret) {
- dev_err(smi->dev, "could not set up MDIO bus\n");
- goto out_teardown_irq;
+ if (priv->setup_interface) {
+ ret = priv->setup_interface(ds);
+ if (ret) {
+ dev_err(priv->dev, "could not set up MDIO bus\n");
+ goto out_teardown_irq;
+ }
}
/* Start statistics counter polling */
- rtl8365mb_stats_setup(smi);
+ rtl8365mb_stats_setup(priv);
return 0;
out_teardown_irq:
- rtl8365mb_irq_teardown(smi);
+ rtl8365mb_irq_teardown(priv);
out_error:
return ret;
@@ -1869,10 +1902,10 @@ out_error:
static void rtl8365mb_teardown(struct dsa_switch *ds)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
- rtl8365mb_stats_teardown(smi);
- rtl8365mb_irq_teardown(smi);
+ rtl8365mb_stats_teardown(priv);
+ rtl8365mb_irq_teardown(priv);
}
static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver)
@@ -1902,48 +1935,57 @@ static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver)
return 0;
}
-static int rtl8365mb_detect(struct realtek_smi *smi)
+static int rtl8365mb_detect(struct realtek_priv *priv)
{
- struct rtl8365mb *mb = smi->chip_data;
+ struct rtl8365mb *mb = priv->chip_data;
u32 chip_id;
u32 chip_ver;
int ret;
- ret = rtl8365mb_get_chip_id_and_ver(smi->map, &chip_id, &chip_ver);
+ ret = rtl8365mb_get_chip_id_and_ver(priv->map, &chip_id, &chip_ver);
if (ret) {
- dev_err(smi->dev, "failed to read chip id and version: %d\n",
+ dev_err(priv->dev, "failed to read chip id and version: %d\n",
ret);
return ret;
}
switch (chip_id) {
case RTL8365MB_CHIP_ID_8365MB_VC:
- dev_info(smi->dev,
- "found an RTL8365MB-VC switch (ver=0x%04x)\n",
- chip_ver);
+ switch (chip_ver) {
+ case RTL8365MB_CHIP_VER_8365MB_VC:
+ dev_info(priv->dev,
+ "found an RTL8365MB-VC switch (ver=0x%04x)\n",
+ chip_ver);
+ break;
+ case RTL8365MB_CHIP_VER_8367RB:
+ dev_info(priv->dev,
+ "found an RTL8367RB-VB switch (ver=0x%04x)\n",
+ chip_ver);
+ break;
+ case RTL8365MB_CHIP_VER_8367S:
+ dev_info(priv->dev,
+ "found an RTL8367S switch (ver=0x%04x)\n",
+ chip_ver);
+ break;
+ default:
+ dev_err(priv->dev, "unrecognized switch version (ver=0x%04x)",
+ chip_ver);
+ return -ENODEV;
+ }
- smi->cpu_port = RTL8365MB_CPU_PORT_NUM_8365MB_VC;
- smi->num_ports = smi->cpu_port + 1;
+ priv->num_ports = RTL8365MB_MAX_NUM_PORTS;
- mb->smi = smi;
+ mb->priv = priv;
mb->chip_id = chip_id;
mb->chip_ver = chip_ver;
- mb->port_mask = BIT(smi->num_ports) - 1;
- mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC;
+ mb->port_mask = GENMASK(priv->num_ports - 1, 0);
+ mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX;
mb->jam_table = rtl8365mb_init_jam_8365mb_vc;
mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc);
- mb->cpu.enable = 1;
- mb->cpu.mask = BIT(smi->cpu_port);
- mb->cpu.trap_port = smi->cpu_port;
- mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
- mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
- mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
- mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
-
break;
default:
- dev_err(smi->dev,
+ dev_err(priv->dev,
"found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n",
chip_id, chip_ver);
return -ENODEV;
@@ -1952,14 +1994,34 @@ static int rtl8365mb_detect(struct realtek_smi *smi)
return 0;
}
-static const struct dsa_switch_ops rtl8365mb_switch_ops = {
+static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
+ .get_tag_protocol = rtl8365mb_get_tag_protocol,
+ .setup = rtl8365mb_setup,
+ .teardown = rtl8365mb_teardown,
+ .phylink_get_caps = rtl8365mb_phylink_get_caps,
+ .phylink_mac_config = rtl8365mb_phylink_mac_config,
+ .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
+ .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
+ .port_stp_state_set = rtl8365mb_port_stp_state_set,
+ .get_strings = rtl8365mb_get_strings,
+ .get_ethtool_stats = rtl8365mb_get_ethtool_stats,
+ .get_sset_count = rtl8365mb_get_sset_count,
+ .get_eth_phy_stats = rtl8365mb_get_phy_stats,
+ .get_eth_mac_stats = rtl8365mb_get_mac_stats,
+ .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats,
+ .get_stats64 = rtl8365mb_get_stats64,
+};
+
+static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
.get_tag_protocol = rtl8365mb_get_tag_protocol,
.setup = rtl8365mb_setup,
.teardown = rtl8365mb_teardown,
- .phylink_validate = rtl8365mb_phylink_validate,
+ .phylink_get_caps = rtl8365mb_phylink_get_caps,
.phylink_mac_config = rtl8365mb_phylink_mac_config,
.phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
.phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
+ .phy_read = rtl8365mb_dsa_phy_read,
+ .phy_write = rtl8365mb_dsa_phy_write,
.port_stp_state_set = rtl8365mb_port_stp_state_set,
.get_strings = rtl8365mb_get_strings,
.get_ethtool_stats = rtl8365mb_get_ethtool_stats,
@@ -1970,18 +2032,23 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops = {
.get_stats64 = rtl8365mb_get_stats64,
};
-static const struct realtek_smi_ops rtl8365mb_smi_ops = {
+static const struct realtek_ops rtl8365mb_ops = {
.detect = rtl8365mb_detect,
.phy_read = rtl8365mb_phy_read,
.phy_write = rtl8365mb_phy_write,
};
-const struct realtek_smi_variant rtl8365mb_variant = {
- .ds_ops = &rtl8365mb_switch_ops,
- .ops = &rtl8365mb_smi_ops,
+const struct realtek_variant rtl8365mb_variant = {
+ .ds_ops_smi = &rtl8365mb_switch_ops_smi,
+ .ds_ops_mdio = &rtl8365mb_switch_ops_mdio,
+ .ops = &rtl8365mb_ops,
.clk_delay = 10,
.cmd_read = 0xb9,
.cmd_write = 0xb8,
.chip_data_sz = sizeof(struct rtl8365mb),
};
EXPORT_SYMBOL_GPL(rtl8365mb_variant);
+
+MODULE_AUTHOR("Alvin Å ipraga <alsi@bang-olufsen.dk>");
+MODULE_DESCRIPTION("Driver for RTL8365MB-VC ethernet switch");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/realtek/rtl8366-core.c
index bdb8d8d34880..dc5f75be3017 100644
--- a/drivers/net/dsa/rtl8366.c
+++ b/drivers/net/dsa/realtek/rtl8366-core.c
@@ -11,18 +11,18 @@
#include <linux/if_bridge.h>
#include <net/dsa.h>
-#include "realtek-smi-core.h"
+#include "realtek.h"
-int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used)
+int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used)
{
int ret;
int i;
*used = 0;
- for (i = 0; i < smi->num_ports; i++) {
+ for (i = 0; i < priv->num_ports; i++) {
int index = 0;
- ret = smi->ops->get_mc_index(smi, i, &index);
+ ret = priv->ops->get_mc_index(priv, i, &index);
if (ret)
return ret;
@@ -38,13 +38,13 @@ EXPORT_SYMBOL_GPL(rtl8366_mc_is_used);
/**
* rtl8366_obtain_mc() - retrieve or allocate a VLAN member configuration
- * @smi: the Realtek SMI device instance
+ * @priv: the Realtek SMI device instance
* @vid: the VLAN ID to look up or allocate
* @vlanmc: the pointer will be assigned to a pointer to a valid member config
* if successful
* @return: index of a new member config or negative error number
*/
-static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
+static int rtl8366_obtain_mc(struct realtek_priv *priv, int vid,
struct rtl8366_vlan_mc *vlanmc)
{
struct rtl8366_vlan_4k vlan4k;
@@ -52,10 +52,10 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
int i;
/* Try to find an existing member config entry for this VID */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+ for (i = 0; i < priv->num_vlan_mc; i++) {
+ ret = priv->ops->get_vlan_mc(priv, i, vlanmc);
if (ret) {
- dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+ dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n",
i, vid);
return ret;
}
@@ -65,19 +65,19 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
}
/* We have no MC entry for this VID, try to find an empty one */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+ for (i = 0; i < priv->num_vlan_mc; i++) {
+ ret = priv->ops->get_vlan_mc(priv, i, vlanmc);
if (ret) {
- dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+ dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n",
i, vid);
return ret;
}
if (vlanmc->vid == 0 && vlanmc->member == 0) {
/* Update the entry from the 4K table */
- ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+ ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
if (ret) {
- dev_err(smi->dev, "error looking for 4K VLAN MC %d for VID %d\n",
+ dev_err(priv->dev, "error looking for 4K VLAN MC %d for VID %d\n",
i, vid);
return ret;
}
@@ -86,30 +86,30 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
vlanmc->member = vlan4k.member;
vlanmc->untag = vlan4k.untag;
vlanmc->fid = vlan4k.fid;
- ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+ ret = priv->ops->set_vlan_mc(priv, i, vlanmc);
if (ret) {
- dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+ dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n",
i, vid);
return ret;
}
- dev_dbg(smi->dev, "created new MC at index %d for VID %d\n",
+ dev_dbg(priv->dev, "created new MC at index %d for VID %d\n",
i, vid);
return i;
}
}
/* MC table is full, try to find an unused entry and replace it */
- for (i = 0; i < smi->num_vlan_mc; i++) {
+ for (i = 0; i < priv->num_vlan_mc; i++) {
int used;
- ret = rtl8366_mc_is_used(smi, i, &used);
+ ret = rtl8366_mc_is_used(priv, i, &used);
if (ret)
return ret;
if (!used) {
/* Update the entry from the 4K table */
- ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+ ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
if (ret)
return ret;
@@ -117,23 +117,23 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
vlanmc->member = vlan4k.member;
vlanmc->untag = vlan4k.untag;
vlanmc->fid = vlan4k.fid;
- ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+ ret = priv->ops->set_vlan_mc(priv, i, vlanmc);
if (ret) {
- dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+ dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n",
i, vid);
return ret;
}
- dev_dbg(smi->dev, "recycled MC at index %i for VID %d\n",
+ dev_dbg(priv->dev, "recycled MC at index %i for VID %d\n",
i, vid);
return i;
}
}
- dev_err(smi->dev, "all VLAN member configurations are in use\n");
+ dev_err(priv->dev, "all VLAN member configurations are in use\n");
return -ENOSPC;
}
-int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member,
u32 untag, u32 fid)
{
struct rtl8366_vlan_mc vlanmc;
@@ -141,31 +141,31 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
int mc;
int ret;
- if (!smi->ops->is_vlan_valid(smi, vid))
+ if (!priv->ops->is_vlan_valid(priv, vid))
return -EINVAL;
- dev_dbg(smi->dev,
+ dev_dbg(priv->dev,
"setting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
vid, member, untag);
/* Update the 4K table */
- ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+ ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
if (ret)
return ret;
vlan4k.member |= member;
vlan4k.untag |= untag;
vlan4k.fid = fid;
- ret = smi->ops->set_vlan_4k(smi, &vlan4k);
+ ret = priv->ops->set_vlan_4k(priv, &vlan4k);
if (ret)
return ret;
- dev_dbg(smi->dev,
+ dev_dbg(priv->dev,
"resulting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
vid, vlan4k.member, vlan4k.untag);
/* Find or allocate a member config for this VID */
- ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+ ret = rtl8366_obtain_mc(priv, vid, &vlanmc);
if (ret < 0)
return ret;
mc = ret;
@@ -176,12 +176,12 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
vlanmc.fid = fid;
/* Commit updates to the MC entry */
- ret = smi->ops->set_vlan_mc(smi, mc, &vlanmc);
+ ret = priv->ops->set_vlan_mc(priv, mc, &vlanmc);
if (ret)
- dev_err(smi->dev, "failed to commit changes to VLAN MC index %d for VID %d\n",
+ dev_err(priv->dev, "failed to commit changes to VLAN MC index %d for VID %d\n",
mc, vid);
else
- dev_dbg(smi->dev,
+ dev_dbg(priv->dev,
"resulting VLAN%d MC members: 0x%02x, untagged: 0x%02x\n",
vid, vlanmc.member, vlanmc.untag);
@@ -189,37 +189,37 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
}
EXPORT_SYMBOL_GPL(rtl8366_set_vlan);
-int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
unsigned int vid)
{
struct rtl8366_vlan_mc vlanmc;
int mc;
int ret;
- if (!smi->ops->is_vlan_valid(smi, vid))
+ if (!priv->ops->is_vlan_valid(priv, vid))
return -EINVAL;
/* Find or allocate a member config for this VID */
- ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+ ret = rtl8366_obtain_mc(priv, vid, &vlanmc);
if (ret < 0)
return ret;
mc = ret;
- ret = smi->ops->set_mc_index(smi, port, mc);
+ ret = priv->ops->set_mc_index(priv, port, mc);
if (ret) {
- dev_err(smi->dev, "set PVID: failed to set MC index %d for port %d\n",
+ dev_err(priv->dev, "set PVID: failed to set MC index %d for port %d\n",
mc, port);
return ret;
}
- dev_dbg(smi->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n",
+ dev_dbg(priv->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n",
port, vid, mc);
return 0;
}
EXPORT_SYMBOL_GPL(rtl8366_set_pvid);
-int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable)
+int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable)
{
int ret;
@@ -229,52 +229,52 @@ int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable)
*/
if (enable) {
/* Make sure VLAN is ON */
- ret = smi->ops->enable_vlan(smi, true);
+ ret = priv->ops->enable_vlan(priv, true);
if (ret)
return ret;
- smi->vlan_enabled = true;
+ priv->vlan_enabled = true;
}
- ret = smi->ops->enable_vlan4k(smi, enable);
+ ret = priv->ops->enable_vlan4k(priv, enable);
if (ret)
return ret;
- smi->vlan4k_enabled = enable;
+ priv->vlan4k_enabled = enable;
return 0;
}
EXPORT_SYMBOL_GPL(rtl8366_enable_vlan4k);
-int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable)
+int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable)
{
int ret;
- ret = smi->ops->enable_vlan(smi, enable);
+ ret = priv->ops->enable_vlan(priv, enable);
if (ret)
return ret;
- smi->vlan_enabled = enable;
+ priv->vlan_enabled = enable;
/* If we turn VLAN off, make sure that we turn off
* 4k VLAN as well, if that happened to be on.
*/
if (!enable) {
- smi->vlan4k_enabled = false;
- ret = smi->ops->enable_vlan4k(smi, false);
+ priv->vlan4k_enabled = false;
+ ret = priv->ops->enable_vlan4k(priv, false);
}
return ret;
}
EXPORT_SYMBOL_GPL(rtl8366_enable_vlan);
-int rtl8366_reset_vlan(struct realtek_smi *smi)
+int rtl8366_reset_vlan(struct realtek_priv *priv)
{
struct rtl8366_vlan_mc vlanmc;
int ret;
int i;
- rtl8366_enable_vlan(smi, false);
- rtl8366_enable_vlan4k(smi, false);
+ rtl8366_enable_vlan(priv, false);
+ rtl8366_enable_vlan4k(priv, false);
/* Clear the 16 VLAN member configurations */
vlanmc.vid = 0;
@@ -282,8 +282,8 @@ int rtl8366_reset_vlan(struct realtek_smi *smi)
vlanmc.member = 0;
vlanmc.untag = 0;
vlanmc.fid = 0;
- for (i = 0; i < smi->num_vlan_mc; i++) {
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+ for (i = 0; i < priv->num_vlan_mc; i++) {
+ ret = priv->ops->set_vlan_mc(priv, i, &vlanmc);
if (ret)
return ret;
}
@@ -298,12 +298,12 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
{
bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID);
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
u32 member = 0;
u32 untag = 0;
int ret;
- if (!smi->ops->is_vlan_valid(smi, vlan->vid)) {
+ if (!priv->ops->is_vlan_valid(priv, vlan->vid)) {
NL_SET_ERR_MSG_MOD(extack, "VLAN ID not valid");
return -EINVAL;
}
@@ -312,13 +312,13 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
* FIXME: what's with this 4k business?
* Just rtl8366_enable_vlan() seems inconclusive.
*/
- ret = rtl8366_enable_vlan4k(smi, true);
+ ret = rtl8366_enable_vlan4k(priv, true);
if (ret) {
NL_SET_ERR_MSG_MOD(extack, "Failed to enable VLAN 4K");
return ret;
}
- dev_dbg(smi->dev, "add VLAN %d on port %d, %s, %s\n",
+ dev_dbg(priv->dev, "add VLAN %d on port %d, %s, %s\n",
vlan->vid, port, untagged ? "untagged" : "tagged",
pvid ? "PVID" : "no PVID");
@@ -327,18 +327,18 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
if (untagged)
untag |= BIT(port);
- ret = rtl8366_set_vlan(smi, vlan->vid, member, untag, 0);
+ ret = rtl8366_set_vlan(priv, vlan->vid, member, untag, 0);
if (ret) {
- dev_err(smi->dev, "failed to set up VLAN %04x", vlan->vid);
+ dev_err(priv->dev, "failed to set up VLAN %04x", vlan->vid);
return ret;
}
if (!pvid)
return 0;
- ret = rtl8366_set_pvid(smi, port, vlan->vid);
+ ret = rtl8366_set_pvid(priv, port, vlan->vid);
if (ret) {
- dev_err(smi->dev, "failed to set PVID on port %d to VLAN %04x",
+ dev_err(priv->dev, "failed to set PVID on port %d to VLAN %04x",
port, vlan->vid);
return ret;
}
@@ -350,15 +350,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_add);
int rtl8366_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret, i;
- dev_dbg(smi->dev, "del VLAN %d on port %d\n", vlan->vid, port);
+ dev_dbg(priv->dev, "del VLAN %d on port %d\n", vlan->vid, port);
- for (i = 0; i < smi->num_vlan_mc; i++) {
+ for (i = 0; i < priv->num_vlan_mc; i++) {
struct rtl8366_vlan_mc vlanmc;
- ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+ ret = priv->ops->get_vlan_mc(priv, i, &vlanmc);
if (ret)
return ret;
@@ -376,9 +376,9 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port,
vlanmc.priority = 0;
vlanmc.fid = 0;
}
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+ ret = priv->ops->set_vlan_mc(priv, i, &vlanmc);
if (ret) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to remove VLAN %04x\n",
vlan->vid);
return ret;
@@ -394,15 +394,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_del);
void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
uint8_t *data)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8366_mib_counter *mib;
int i;
- if (port >= smi->num_ports)
+ if (port >= priv->num_ports)
return;
- for (i = 0; i < smi->num_mib_counters; i++) {
- mib = &smi->mib_counters[i];
+ for (i = 0; i < priv->num_mib_counters; i++) {
+ mib = &priv->mib_counters[i];
strncpy(data + i * ETH_GSTRING_LEN,
mib->name, ETH_GSTRING_LEN);
}
@@ -411,35 +411,35 @@ EXPORT_SYMBOL_GPL(rtl8366_get_strings);
int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
/* We only support SS_STATS */
if (sset != ETH_SS_STATS)
return 0;
- if (port >= smi->num_ports)
+ if (port >= priv->num_ports)
return -EINVAL;
- return smi->num_mib_counters;
+ return priv->num_mib_counters;
}
EXPORT_SYMBOL_GPL(rtl8366_get_sset_count);
void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int i;
int ret;
- if (port >= smi->num_ports)
+ if (port >= priv->num_ports)
return;
- for (i = 0; i < smi->num_mib_counters; i++) {
+ for (i = 0; i < priv->num_mib_counters; i++) {
struct rtl8366_mib_counter *mib;
u64 mibvalue = 0;
- mib = &smi->mib_counters[i];
- ret = smi->ops->get_mib_counter(smi, port, mib, &mibvalue);
+ mib = &priv->mib_counters[i];
+ ret = priv->ops->get_mib_counter(priv, port, mib, &mibvalue);
if (ret) {
- dev_err(smi->dev, "error reading MIB counter %s\n",
+ dev_err(priv->dev, "error reading MIB counter %s\n",
mib->name);
}
data[i] = mibvalue;
diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index ecc19bd5115f..fb6565e68401 100644
--- a/drivers/net/dsa/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -21,7 +21,7 @@
#include <linux/of_irq.h>
#include <linux/regmap.h>
-#include "realtek-smi-core.h"
+#include "realtek.h"
#define RTL8366RB_PORT_NUM_CPU 5
#define RTL8366RB_NUM_PORTS 6
@@ -396,7 +396,7 @@ static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = {
{ 0, 70, 2, "IfOutBroadcastPkts" },
};
-static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
+static int rtl8366rb_get_mib_counter(struct realtek_priv *priv,
int port,
struct rtl8366_mib_counter *mib,
u64 *mibvalue)
@@ -412,12 +412,12 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
/* Writing access counter address first
* then ASIC will prepare 64bits counter wait for being retrived
*/
- ret = regmap_write(smi->map, addr, 0); /* Write whatever */
+ ret = regmap_write(priv->map, addr, 0); /* Write whatever */
if (ret)
return ret;
/* Read MIB control register */
- ret = regmap_read(smi->map, RTL8366RB_MIB_CTRL_REG, &val);
+ ret = regmap_read(priv->map, RTL8366RB_MIB_CTRL_REG, &val);
if (ret)
return -EIO;
@@ -430,7 +430,7 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
/* Read each individual MIB 16 bits at the time */
*mibvalue = 0;
for (i = mib->length; i > 0; i--) {
- ret = regmap_read(smi->map, addr + (i - 1), &val);
+ ret = regmap_read(priv->map, addr + (i - 1), &val);
if (ret)
return ret;
*mibvalue = (*mibvalue << 16) | (val & 0xFFFF);
@@ -455,38 +455,38 @@ static u32 rtl8366rb_get_irqmask(struct irq_data *d)
static void rtl8366rb_mask_irq(struct irq_data *d)
{
- struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+ struct realtek_priv *priv = irq_data_get_irq_chip_data(d);
int ret;
- ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG,
rtl8366rb_get_irqmask(d), 0);
if (ret)
- dev_err(smi->dev, "could not mask IRQ\n");
+ dev_err(priv->dev, "could not mask IRQ\n");
}
static void rtl8366rb_unmask_irq(struct irq_data *d)
{
- struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+ struct realtek_priv *priv = irq_data_get_irq_chip_data(d);
int ret;
- ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG,
rtl8366rb_get_irqmask(d),
rtl8366rb_get_irqmask(d));
if (ret)
- dev_err(smi->dev, "could not unmask IRQ\n");
+ dev_err(priv->dev, "could not unmask IRQ\n");
}
static irqreturn_t rtl8366rb_irq(int irq, void *data)
{
- struct realtek_smi *smi = data;
+ struct realtek_priv *priv = data;
u32 stat;
int ret;
/* This clears the IRQ status register */
- ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+ ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG,
&stat);
if (ret) {
- dev_err(smi->dev, "can't read interrupt status\n");
+ dev_err(priv->dev, "can't read interrupt status\n");
return IRQ_NONE;
}
stat &= RTL8366RB_INTERRUPT_VALID;
@@ -502,7 +502,7 @@ static irqreturn_t rtl8366rb_irq(int irq, void *data)
*/
if (line < 12 && line > 5)
line -= 5;
- child_irq = irq_find_mapping(smi->irqdomain, line);
+ child_irq = irq_find_mapping(priv->irqdomain, line);
handle_nested_irq(child_irq);
}
return IRQ_HANDLED;
@@ -538,7 +538,7 @@ static const struct irq_domain_ops rtl8366rb_irqdomain_ops = {
.xlate = irq_domain_xlate_onecell,
};
-static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
+static int rtl8366rb_setup_cascaded_irq(struct realtek_priv *priv)
{
struct device_node *intc;
unsigned long irq_trig;
@@ -547,24 +547,24 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
u32 val;
int i;
- intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller");
+ intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller");
if (!intc) {
- dev_err(smi->dev, "missing child interrupt-controller node\n");
+ dev_err(priv->dev, "missing child interrupt-controller node\n");
return -EINVAL;
}
/* RB8366RB IRQs cascade off this one */
irq = of_irq_get(intc, 0);
if (irq <= 0) {
- dev_err(smi->dev, "failed to get parent IRQ\n");
+ dev_err(priv->dev, "failed to get parent IRQ\n");
ret = irq ? irq : -EINVAL;
goto out_put_node;
}
/* This clears the IRQ status register */
- ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+ ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG,
&val);
if (ret) {
- dev_err(smi->dev, "can't read interrupt status\n");
+ dev_err(priv->dev, "can't read interrupt status\n");
goto out_put_node;
}
@@ -573,48 +573,48 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
switch (irq_trig) {
case IRQF_TRIGGER_RISING:
case IRQF_TRIGGER_HIGH:
- dev_info(smi->dev, "active high/rising IRQ\n");
+ dev_info(priv->dev, "active high/rising IRQ\n");
val = 0;
break;
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
- dev_info(smi->dev, "active low/falling IRQ\n");
+ dev_info(priv->dev, "active low/falling IRQ\n");
val = RTL8366RB_INTERRUPT_POLARITY;
break;
}
- ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_CONTROL_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_CONTROL_REG,
RTL8366RB_INTERRUPT_POLARITY,
val);
if (ret) {
- dev_err(smi->dev, "could not configure IRQ polarity\n");
+ dev_err(priv->dev, "could not configure IRQ polarity\n");
goto out_put_node;
}
- ret = devm_request_threaded_irq(smi->dev, irq, NULL,
+ ret = devm_request_threaded_irq(priv->dev, irq, NULL,
rtl8366rb_irq, IRQF_ONESHOT,
- "RTL8366RB", smi);
+ "RTL8366RB", priv);
if (ret) {
- dev_err(smi->dev, "unable to request irq: %d\n", ret);
+ dev_err(priv->dev, "unable to request irq: %d\n", ret);
goto out_put_node;
}
- smi->irqdomain = irq_domain_add_linear(intc,
- RTL8366RB_NUM_INTERRUPT,
- &rtl8366rb_irqdomain_ops,
- smi);
- if (!smi->irqdomain) {
- dev_err(smi->dev, "failed to create IRQ domain\n");
+ priv->irqdomain = irq_domain_add_linear(intc,
+ RTL8366RB_NUM_INTERRUPT,
+ &rtl8366rb_irqdomain_ops,
+ priv);
+ if (!priv->irqdomain) {
+ dev_err(priv->dev, "failed to create IRQ domain\n");
ret = -EINVAL;
goto out_put_node;
}
- for (i = 0; i < smi->num_ports; i++)
- irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq);
+ for (i = 0; i < priv->num_ports; i++)
+ irq_set_parent(irq_create_mapping(priv->irqdomain, i), irq);
out_put_node:
of_node_put(intc);
return ret;
}
-static int rtl8366rb_set_addr(struct realtek_smi *smi)
+static int rtl8366rb_set_addr(struct realtek_priv *priv)
{
u8 addr[ETH_ALEN];
u16 val;
@@ -622,18 +622,18 @@ static int rtl8366rb_set_addr(struct realtek_smi *smi)
eth_random_addr(addr);
- dev_info(smi->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
+ dev_info(priv->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
val = addr[0] << 8 | addr[1];
- ret = regmap_write(smi->map, RTL8366RB_SMAR0, val);
+ ret = regmap_write(priv->map, RTL8366RB_SMAR0, val);
if (ret)
return ret;
val = addr[2] << 8 | addr[3];
- ret = regmap_write(smi->map, RTL8366RB_SMAR1, val);
+ ret = regmap_write(priv->map, RTL8366RB_SMAR1, val);
if (ret)
return ret;
val = addr[4] << 8 | addr[5];
- ret = regmap_write(smi->map, RTL8366RB_SMAR2, val);
+ ret = regmap_write(priv->map, RTL8366RB_SMAR2, val);
if (ret)
return ret;
@@ -765,7 +765,7 @@ static const struct rtl8366rb_jam_tbl_entry rtl8366rb_green_jam[] = {
/* Function that jams the tables in the proper registers */
static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
- int jam_size, struct realtek_smi *smi,
+ int jam_size, struct realtek_priv *priv,
bool write_dbg)
{
u32 val;
@@ -774,24 +774,24 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
for (i = 0; i < jam_size; i++) {
if ((jam_table[i].reg & 0xBE00) == 0xBE00) {
- ret = regmap_read(smi->map,
+ ret = regmap_read(priv->map,
RTL8366RB_PHY_ACCESS_BUSY_REG,
&val);
if (ret)
return ret;
if (!(val & RTL8366RB_PHY_INT_BUSY)) {
- ret = regmap_write(smi->map,
- RTL8366RB_PHY_ACCESS_CTRL_REG,
- RTL8366RB_PHY_CTRL_WRITE);
+ ret = regmap_write(priv->map,
+ RTL8366RB_PHY_ACCESS_CTRL_REG,
+ RTL8366RB_PHY_CTRL_WRITE);
if (ret)
return ret;
}
}
if (write_dbg)
- dev_dbg(smi->dev, "jam %04x into register %04x\n",
+ dev_dbg(priv->dev, "jam %04x into register %04x\n",
jam_table[i].val,
jam_table[i].reg);
- ret = regmap_write(smi->map,
+ ret = regmap_write(priv->map,
jam_table[i].reg,
jam_table[i].val);
if (ret)
@@ -802,7 +802,7 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
static int rtl8366rb_setup(struct dsa_switch *ds)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
const struct rtl8366rb_jam_tbl_entry *jam_table;
struct rtl8366rb *rb;
u32 chip_ver = 0;
@@ -812,11 +812,11 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
int ret;
int i;
- rb = smi->chip_data;
+ rb = priv->chip_data;
- ret = regmap_read(smi->map, RTL8366RB_CHIP_ID_REG, &chip_id);
+ ret = regmap_read(priv->map, RTL8366RB_CHIP_ID_REG, &chip_id);
if (ret) {
- dev_err(smi->dev, "unable to read chip id\n");
+ dev_err(priv->dev, "unable to read chip id\n");
return ret;
}
@@ -824,18 +824,18 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
case RTL8366RB_CHIP_ID_8366:
break;
default:
- dev_err(smi->dev, "unknown chip id (%04x)\n", chip_id);
+ dev_err(priv->dev, "unknown chip id (%04x)\n", chip_id);
return -ENODEV;
}
- ret = regmap_read(smi->map, RTL8366RB_CHIP_VERSION_CTRL_REG,
+ ret = regmap_read(priv->map, RTL8366RB_CHIP_VERSION_CTRL_REG,
&chip_ver);
if (ret) {
- dev_err(smi->dev, "unable to read chip version\n");
+ dev_err(priv->dev, "unable to read chip version\n");
return ret;
}
- dev_info(smi->dev, "RTL%04x ver %u chip found\n",
+ dev_info(priv->dev, "RTL%04x ver %u chip found\n",
chip_id, chip_ver & RTL8366RB_CHIP_VERSION_MASK);
/* Do the init dance using the right jam table */
@@ -872,20 +872,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
jam_size = ARRAY_SIZE(rtl8366rb_init_jam_dgn3500);
}
- ret = rtl8366rb_jam_table(jam_table, jam_size, smi, true);
+ ret = rtl8366rb_jam_table(jam_table, jam_size, priv, true);
if (ret)
return ret;
/* Isolate all user ports so they can only send packets to itself and the CPU port */
for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
- ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(i),
+ ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(i),
RTL8366RB_PORT_ISO_PORTS(BIT(RTL8366RB_PORT_NUM_CPU)) |
RTL8366RB_PORT_ISO_EN);
if (ret)
return ret;
}
/* CPU port can send packets to all ports */
- ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU),
+ ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU),
RTL8366RB_PORT_ISO_PORTS(dsa_user_ports(ds)) |
RTL8366RB_PORT_ISO_EN);
if (ret)
@@ -893,26 +893,26 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
/* Set up the "green ethernet" feature */
ret = rtl8366rb_jam_table(rtl8366rb_green_jam,
- ARRAY_SIZE(rtl8366rb_green_jam), smi, false);
+ ARRAY_SIZE(rtl8366rb_green_jam), priv, false);
if (ret)
return ret;
- ret = regmap_write(smi->map,
+ ret = regmap_write(priv->map,
RTL8366RB_GREEN_FEATURE_REG,
(chip_ver == 1) ? 0x0007 : 0x0003);
if (ret)
return ret;
/* Vendor driver sets 0x240 in registers 0xc and 0xd (undocumented) */
- ret = regmap_write(smi->map, 0x0c, 0x240);
+ ret = regmap_write(priv->map, 0x0c, 0x240);
if (ret)
return ret;
- ret = regmap_write(smi->map, 0x0d, 0x240);
+ ret = regmap_write(priv->map, 0x0d, 0x240);
if (ret)
return ret;
/* Set some random MAC address */
- ret = rtl8366rb_set_addr(smi);
+ ret = rtl8366rb_set_addr(priv);
if (ret)
return ret;
@@ -921,21 +921,21 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
* If you set RTL8368RB_CPU_NO_TAG (bit 15) in this registers
* the custom tag is turned off.
*/
- ret = regmap_update_bits(smi->map, RTL8368RB_CPU_CTRL_REG,
+ ret = regmap_update_bits(priv->map, RTL8368RB_CPU_CTRL_REG,
0xFFFF,
- BIT(smi->cpu_port));
+ BIT(priv->cpu_port));
if (ret)
return ret;
/* Make sure we default-enable the fixed CPU port */
- ret = regmap_update_bits(smi->map, RTL8366RB_PECR,
- BIT(smi->cpu_port),
+ ret = regmap_update_bits(priv->map, RTL8366RB_PECR,
+ BIT(priv->cpu_port),
0);
if (ret)
return ret;
/* Set maximum packet length to 1536 bytes */
- ret = regmap_update_bits(smi->map, RTL8366RB_SGCR,
+ ret = regmap_update_bits(priv->map, RTL8366RB_SGCR,
RTL8366RB_SGCR_MAX_LENGTH_MASK,
RTL8366RB_SGCR_MAX_LENGTH_1536);
if (ret)
@@ -945,13 +945,13 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
rb->max_mtu[i] = 1532;
/* Disable learning for all ports */
- ret = regmap_write(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL,
+ ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
RTL8366RB_PORT_ALL);
if (ret)
return ret;
/* Enable auto ageing for all ports */
- ret = regmap_write(smi->map, RTL8366RB_SECURITY_CTRL, 0);
+ ret = regmap_write(priv->map, RTL8366RB_SECURITY_CTRL, 0);
if (ret)
return ret;
@@ -962,30 +962,30 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
* connected to something exotic such as fiber, then this might
* be worth experimenting with.
*/
- ret = regmap_update_bits(smi->map, RTL8366RB_PMC0,
+ ret = regmap_update_bits(priv->map, RTL8366RB_PMC0,
RTL8366RB_PMC0_P4_IOMODE_MASK,
0 << RTL8366RB_PMC0_P4_IOMODE_SHIFT);
if (ret)
return ret;
/* Accept all packets by default, we enable filtering on-demand */
- ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
+ ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
0);
if (ret)
return ret;
- ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
+ ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
0);
if (ret)
return ret;
/* Don't drop packets whose DA has not been learned */
- ret = regmap_update_bits(smi->map, RTL8366RB_SSCR2,
+ ret = regmap_update_bits(priv->map, RTL8366RB_SSCR2,
RTL8366RB_SSCR2_DROP_UNKNOWN_DA, 0);
if (ret)
return ret;
/* Set blinking, TODO: make this configurable */
- ret = regmap_update_bits(smi->map, RTL8366RB_LED_BLINKRATE_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_LED_BLINKRATE_REG,
RTL8366RB_LED_BLINKRATE_MASK,
RTL8366RB_LED_BLINKRATE_56MS);
if (ret)
@@ -996,15 +996,15 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
* behaviour (no individual config) but we can set up each
* LED separately.
*/
- if (smi->leds_disabled) {
+ if (priv->leds_disabled) {
/* Turn everything off */
- regmap_update_bits(smi->map,
+ regmap_update_bits(priv->map,
RTL8366RB_LED_0_1_CTRL_REG,
0x0FFF, 0);
- regmap_update_bits(smi->map,
+ regmap_update_bits(priv->map,
RTL8366RB_LED_2_3_CTRL_REG,
0x0FFF, 0);
- regmap_update_bits(smi->map,
+ regmap_update_bits(priv->map,
RTL8366RB_INTERRUPT_CONTROL_REG,
RTL8366RB_P4_RGMII_LED,
0);
@@ -1014,7 +1014,7 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
val = RTL8366RB_LED_FORCE;
}
for (i = 0; i < 4; i++) {
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_LED_CTRL_REG,
0xf << (i * 4),
val << (i * 4));
@@ -1022,18 +1022,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
return ret;
}
- ret = rtl8366_reset_vlan(smi);
+ ret = rtl8366_reset_vlan(priv);
if (ret)
return ret;
- ret = rtl8366rb_setup_cascaded_irq(smi);
+ ret = rtl8366rb_setup_cascaded_irq(priv);
if (ret)
- dev_info(smi->dev, "no interrupt support\n");
+ dev_info(priv->dev, "no interrupt support\n");
- ret = realtek_smi_setup_mdio(smi);
- if (ret) {
- dev_info(smi->dev, "could not set up MDIO bus\n");
- return -ENODEV;
+ if (priv->setup_interface) {
+ ret = priv->setup_interface(ds);
+ if (ret) {
+ dev_err(priv->dev, "could not set up MDIO bus\n");
+ return -ENODEV;
+ }
}
return 0;
@@ -1052,35 +1054,35 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface, struct phy_device *phydev,
int speed, int duplex, bool tx_pause, bool rx_pause)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
- if (port != smi->cpu_port)
+ if (port != priv->cpu_port)
return;
- dev_dbg(smi->dev, "MAC link up on CPU port (%d)\n", port);
+ dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port);
/* Force the fixed CPU port into 1Gbit mode, no autonegotiation */
- ret = regmap_update_bits(smi->map, RTL8366RB_MAC_FORCE_CTRL_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG,
BIT(port), BIT(port));
if (ret) {
- dev_err(smi->dev, "failed to force 1Gbit on CPU port\n");
+ dev_err(priv->dev, "failed to force 1Gbit on CPU port\n");
return;
}
- ret = regmap_update_bits(smi->map, RTL8366RB_PAACR2,
+ ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2,
0xFF00U,
RTL8366RB_PAACR_CPU_PORT << 8);
if (ret) {
- dev_err(smi->dev, "failed to set PAACR on CPU port\n");
+ dev_err(priv->dev, "failed to set PAACR on CPU port\n");
return;
}
/* Enable the CPU port */
- ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+ ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
0);
if (ret) {
- dev_err(smi->dev, "failed to enable the CPU port\n");
+ dev_err(priv->dev, "failed to enable the CPU port\n");
return;
}
}
@@ -1089,99 +1091,99 @@ static void
rtl8366rb_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
- if (port != smi->cpu_port)
+ if (port != priv->cpu_port)
return;
- dev_dbg(smi->dev, "MAC link down on CPU port (%d)\n", port);
+ dev_dbg(priv->dev, "MAC link down on CPU port (%d)\n", port);
/* Disable the CPU port */
- ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+ ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
BIT(port));
if (ret) {
- dev_err(smi->dev, "failed to disable the CPU port\n");
+ dev_err(priv->dev, "failed to disable the CPU port\n");
return;
}
}
-static void rb8366rb_set_port_led(struct realtek_smi *smi,
+static void rb8366rb_set_port_led(struct realtek_priv *priv,
int port, bool enable)
{
u16 val = enable ? 0x3f : 0;
int ret;
- if (smi->leds_disabled)
+ if (priv->leds_disabled)
return;
switch (port) {
case 0:
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_LED_0_1_CTRL_REG,
0x3F, val);
break;
case 1:
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_LED_0_1_CTRL_REG,
0x3F << RTL8366RB_LED_1_OFFSET,
val << RTL8366RB_LED_1_OFFSET);
break;
case 2:
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_LED_2_3_CTRL_REG,
0x3F, val);
break;
case 3:
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_LED_2_3_CTRL_REG,
0x3F << RTL8366RB_LED_3_OFFSET,
val << RTL8366RB_LED_3_OFFSET);
break;
case 4:
- ret = regmap_update_bits(smi->map,
+ ret = regmap_update_bits(priv->map,
RTL8366RB_INTERRUPT_CONTROL_REG,
RTL8366RB_P4_RGMII_LED,
enable ? RTL8366RB_P4_RGMII_LED : 0);
break;
default:
- dev_err(smi->dev, "no LED for port %d\n", port);
+ dev_err(priv->dev, "no LED for port %d\n", port);
return;
}
if (ret)
- dev_err(smi->dev, "error updating LED on port %d\n", port);
+ dev_err(priv->dev, "error updating LED on port %d\n", port);
}
static int
rtl8366rb_port_enable(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
- dev_dbg(smi->dev, "enable port %d\n", port);
- ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+ dev_dbg(priv->dev, "enable port %d\n", port);
+ ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
0);
if (ret)
return ret;
- rb8366rb_set_port_led(smi, port, true);
+ rb8366rb_set_port_led(priv, port, true);
return 0;
}
static void
rtl8366rb_port_disable(struct dsa_switch *ds, int port)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
- dev_dbg(smi->dev, "disable port %d\n", port);
- ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+ dev_dbg(priv->dev, "disable port %d\n", port);
+ ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
BIT(port));
if (ret)
return;
- rb8366rb_set_port_led(smi, port, false);
+ rb8366rb_set_port_led(priv, port, false);
}
static int
@@ -1189,7 +1191,7 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port,
struct dsa_bridge bridge,
bool *tx_fwd_offload)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
unsigned int port_bitmap = 0;
int ret, i;
@@ -1202,17 +1204,17 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port,
if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
continue;
/* Join this port to each other port on the bridge */
- ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i),
+ ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
RTL8366RB_PORT_ISO_PORTS(BIT(port)),
RTL8366RB_PORT_ISO_PORTS(BIT(port)));
if (ret)
- dev_err(smi->dev, "failed to join port %d\n", port);
+ dev_err(priv->dev, "failed to join port %d\n", port);
port_bitmap |= BIT(i);
}
/* Set the bits for the ports we can access */
- return regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port),
+ return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
RTL8366RB_PORT_ISO_PORTS(port_bitmap),
RTL8366RB_PORT_ISO_PORTS(port_bitmap));
}
@@ -1221,7 +1223,7 @@ static void
rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port,
struct dsa_bridge bridge)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
unsigned int port_bitmap = 0;
int ret, i;
@@ -1234,28 +1236,30 @@ rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port,
if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
continue;
/* Remove this port from any other port on the bridge */
- ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i),
+ ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
RTL8366RB_PORT_ISO_PORTS(BIT(port)), 0);
if (ret)
- dev_err(smi->dev, "failed to leave port %d\n", port);
+ dev_err(priv->dev, "failed to leave port %d\n", port);
port_bitmap |= BIT(i);
}
/* Clear the bits for the ports we can not access, leave ourselves */
- regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port),
+ regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
RTL8366RB_PORT_ISO_PORTS(port_bitmap), 0);
}
/**
* rtl8366rb_drop_untagged() - make the switch drop untagged and C-tagged frames
- * @smi: SMI state container
+ * @priv: SMI state container
* @port: the port to drop untagged and C-tagged frames on
* @drop: whether to drop or pass untagged and C-tagged frames
+ *
+ * Return: zero for success, a negative number on error.
*/
-static int rtl8366rb_drop_untagged(struct realtek_smi *smi, int port, bool drop)
+static int rtl8366rb_drop_untagged(struct realtek_priv *priv, int port, bool drop)
{
- return regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
+ return regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port),
drop ? RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port) : 0);
}
@@ -1264,17 +1268,17 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port,
bool vlan_filtering,
struct netlink_ext_ack *extack)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8366rb *rb;
int ret;
- rb = smi->chip_data;
+ rb = priv->chip_data;
- dev_dbg(smi->dev, "port %d: %s VLAN filtering\n", port,
+ dev_dbg(priv->dev, "port %d: %s VLAN filtering\n", port,
vlan_filtering ? "enable" : "disable");
/* If the port is not in the member set, the frame will be dropped */
- ret = regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
+ ret = regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
BIT(port), vlan_filtering ? BIT(port) : 0);
if (ret)
return ret;
@@ -1284,9 +1288,9 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port,
* filtering on a port, we need to accept any frames.
*/
if (vlan_filtering)
- ret = rtl8366rb_drop_untagged(smi, port, !rb->pvid_enabled[port]);
+ ret = rtl8366rb_drop_untagged(priv, port, !rb->pvid_enabled[port]);
else
- ret = rtl8366rb_drop_untagged(smi, port, false);
+ ret = rtl8366rb_drop_untagged(priv, port, false);
return ret;
}
@@ -1308,11 +1312,11 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
int ret;
if (flags.mask & BR_LEARNING) {
- ret = regmap_update_bits(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL,
+ ret = regmap_update_bits(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
BIT(port),
(flags.val & BR_LEARNING) ? 0 : BIT(port));
if (ret)
@@ -1325,7 +1329,7 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
static void
rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
u32 val;
int i;
@@ -1344,13 +1348,13 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
val = RTL8366RB_STP_STATE_FORWARDING;
break;
default:
- dev_err(smi->dev, "unknown bridge state requested\n");
+ dev_err(priv->dev, "unknown bridge state requested\n");
return;
}
/* Set the same status for the port on all the FIDs */
for (i = 0; i < RTL8366RB_NUM_FIDS; i++) {
- regmap_update_bits(smi->map, RTL8366RB_STP_STATE_BASE + i,
+ regmap_update_bits(priv->map, RTL8366RB_STP_STATE_BASE + i,
RTL8366RB_STP_STATE_MASK(port),
RTL8366RB_STP_STATE(port, val));
}
@@ -1359,26 +1363,26 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
static void
rtl8366rb_port_fast_age(struct dsa_switch *ds, int port)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
/* This will age out any learned L2 entries */
- regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL,
+ regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL,
BIT(port), BIT(port));
/* Restore the normal state of things */
- regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL,
+ regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL,
BIT(port), 0);
}
static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
- struct realtek_smi *smi = ds->priv;
+ struct realtek_priv *priv = ds->priv;
struct rtl8366rb *rb;
unsigned int max_mtu;
u32 len;
int i;
/* Cache the per-port MTU setting */
- rb = smi->chip_data;
+ rb = priv->chip_data;
rb->max_mtu[port] = new_mtu;
/* Roof out the MTU for the entire switch to the greatest
@@ -1406,7 +1410,7 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
else
len = RTL8366RB_SGCR_MAX_LENGTH_16000;
- return regmap_update_bits(smi->map, RTL8366RB_SGCR,
+ return regmap_update_bits(priv->map, RTL8366RB_SGCR,
RTL8366RB_SGCR_MAX_LENGTH_MASK,
len);
}
@@ -1419,7 +1423,7 @@ static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port)
return 15996;
}
-static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
+static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid,
struct rtl8366_vlan_4k *vlan4k)
{
u32 data[3];
@@ -1432,19 +1436,19 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
return -EINVAL;
/* write VID */
- ret = regmap_write(smi->map, RTL8366RB_VLAN_TABLE_WRITE_BASE,
+ ret = regmap_write(priv->map, RTL8366RB_VLAN_TABLE_WRITE_BASE,
vid & RTL8366RB_VLAN_VID_MASK);
if (ret)
return ret;
/* write table access control word */
- ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+ ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
RTL8366RB_TABLE_VLAN_READ_CTRL);
if (ret)
return ret;
for (i = 0; i < 3; i++) {
- ret = regmap_read(smi->map,
+ ret = regmap_read(priv->map,
RTL8366RB_VLAN_TABLE_READ_BASE + i,
&data[i]);
if (ret)
@@ -1460,7 +1464,7 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
return 0;
}
-static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
+static int rtl8366rb_set_vlan_4k(struct realtek_priv *priv,
const struct rtl8366_vlan_4k *vlan4k)
{
u32 data[3];
@@ -1480,7 +1484,7 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
data[2] = vlan4k->fid & RTL8366RB_VLAN_FID_MASK;
for (i = 0; i < 3; i++) {
- ret = regmap_write(smi->map,
+ ret = regmap_write(priv->map,
RTL8366RB_VLAN_TABLE_WRITE_BASE + i,
data[i]);
if (ret)
@@ -1488,13 +1492,13 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
}
/* write table access control word */
- ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+ ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
RTL8366RB_TABLE_VLAN_WRITE_CTRL);
return ret;
}
-static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
+static int rtl8366rb_get_vlan_mc(struct realtek_priv *priv, u32 index,
struct rtl8366_vlan_mc *vlanmc)
{
u32 data[3];
@@ -1507,7 +1511,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
return -EINVAL;
for (i = 0; i < 3; i++) {
- ret = regmap_read(smi->map,
+ ret = regmap_read(priv->map,
RTL8366RB_VLAN_MC_BASE(index) + i,
&data[i]);
if (ret)
@@ -1525,7 +1529,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
return 0;
}
-static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
+static int rtl8366rb_set_vlan_mc(struct realtek_priv *priv, u32 index,
const struct rtl8366_vlan_mc *vlanmc)
{
u32 data[3];
@@ -1549,7 +1553,7 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
data[2] = vlanmc->fid & RTL8366RB_VLAN_FID_MASK;
for (i = 0; i < 3; i++) {
- ret = regmap_write(smi->map,
+ ret = regmap_write(priv->map,
RTL8366RB_VLAN_MC_BASE(index) + i,
data[i]);
if (ret)
@@ -1559,15 +1563,15 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
return 0;
}
-static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val)
+static int rtl8366rb_get_mc_index(struct realtek_priv *priv, int port, int *val)
{
u32 data;
int ret;
- if (port >= smi->num_ports)
+ if (port >= priv->num_ports)
return -EINVAL;
- ret = regmap_read(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+ ret = regmap_read(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
&data);
if (ret)
return ret;
@@ -1578,22 +1582,22 @@ static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val)
return 0;
}
-static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
+static int rtl8366rb_set_mc_index(struct realtek_priv *priv, int port, int index)
{
struct rtl8366rb *rb;
bool pvid_enabled;
int ret;
- rb = smi->chip_data;
+ rb = priv->chip_data;
pvid_enabled = !!index;
- if (port >= smi->num_ports || index >= RTL8366RB_NUM_VLANS)
+ if (port >= priv->num_ports || index >= RTL8366RB_NUM_VLANS)
return -EINVAL;
- ret = regmap_update_bits(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
- RTL8366RB_PORT_VLAN_CTRL_MASK <<
+ ret = regmap_update_bits(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+ RTL8366RB_PORT_VLAN_CTRL_MASK <<
RTL8366RB_PORT_VLAN_CTRL_SHIFT(port),
- (index & RTL8366RB_PORT_VLAN_CTRL_MASK) <<
+ (index & RTL8366RB_PORT_VLAN_CTRL_MASK) <<
RTL8366RB_PORT_VLAN_CTRL_SHIFT(port));
if (ret)
return ret;
@@ -1604,17 +1608,17 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
* not drop any untagged or C-tagged frames. Make sure to update the
* filtering setting.
*/
- if (dsa_port_is_vlan_filtering(dsa_to_port(smi->ds, port)))
- ret = rtl8366rb_drop_untagged(smi, port, !pvid_enabled);
+ if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)))
+ ret = rtl8366rb_drop_untagged(priv, port, !pvid_enabled);
return ret;
}
-static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
+static bool rtl8366rb_is_vlan_valid(struct realtek_priv *priv, unsigned int vlan)
{
unsigned int max = RTL8366RB_NUM_VLANS - 1;
- if (smi->vlan4k_enabled)
+ if (priv->vlan4k_enabled)
max = RTL8366RB_NUM_VIDS - 1;
if (vlan > max)
@@ -1623,23 +1627,23 @@ static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
return true;
}
-static int rtl8366rb_enable_vlan(struct realtek_smi *smi, bool enable)
+static int rtl8366rb_enable_vlan(struct realtek_priv *priv, bool enable)
{
- dev_dbg(smi->dev, "%s VLAN\n", enable ? "enable" : "disable");
- return regmap_update_bits(smi->map,
+ dev_dbg(priv->dev, "%s VLAN\n", enable ? "enable" : "disable");
+ return regmap_update_bits(priv->map,
RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN,
enable ? RTL8366RB_SGCR_EN_VLAN : 0);
}
-static int rtl8366rb_enable_vlan4k(struct realtek_smi *smi, bool enable)
+static int rtl8366rb_enable_vlan4k(struct realtek_priv *priv, bool enable)
{
- dev_dbg(smi->dev, "%s VLAN 4k\n", enable ? "enable" : "disable");
- return regmap_update_bits(smi->map, RTL8366RB_SGCR,
+ dev_dbg(priv->dev, "%s VLAN 4k\n", enable ? "enable" : "disable");
+ return regmap_update_bits(priv->map, RTL8366RB_SGCR,
RTL8366RB_SGCR_EN_VLAN_4KTB,
enable ? RTL8366RB_SGCR_EN_VLAN_4KTB : 0);
}
-static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum)
+static int rtl8366rb_phy_read(struct realtek_priv *priv, int phy, int regnum)
{
u32 val;
u32 reg;
@@ -1648,32 +1652,32 @@ static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum)
if (phy > RTL8366RB_PHY_NO_MAX)
return -EINVAL;
- ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+ ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
RTL8366RB_PHY_CTRL_READ);
if (ret)
return ret;
reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
- ret = regmap_write(smi->map, reg, 0);
+ ret = regmap_write(priv->map, reg, 0);
if (ret) {
- dev_err(smi->dev,
+ dev_err(priv->dev,
"failed to write PHY%d reg %04x @ %04x, ret %d\n",
phy, regnum, reg, ret);
return ret;
}
- ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val);
+ ret = regmap_read(priv->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val);
if (ret)
return ret;
- dev_dbg(smi->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n",
+ dev_dbg(priv->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n",
phy, regnum, reg, val);
return val;
}
-static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum,
+static int rtl8366rb_phy_write(struct realtek_priv *priv, int phy, int regnum,
u16 val)
{
u32 reg;
@@ -1682,34 +1686,45 @@ static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum,
if (phy > RTL8366RB_PHY_NO_MAX)
return -EINVAL;
- ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+ ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
RTL8366RB_PHY_CTRL_WRITE);
if (ret)
return ret;
reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
- dev_dbg(smi->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n",
+ dev_dbg(priv->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n",
phy, regnum, reg, val);
- ret = regmap_write(smi->map, reg, val);
+ ret = regmap_write(priv->map, reg, val);
if (ret)
return ret;
return 0;
}
-static int rtl8366rb_reset_chip(struct realtek_smi *smi)
+static int rtl8366rb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+ return rtl8366rb_phy_read(ds->priv, phy, regnum);
+}
+
+static int rtl8366rb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
+ u16 val)
+{
+ return rtl8366rb_phy_write(ds->priv, phy, regnum, val);
+}
+
+static int rtl8366rb_reset_chip(struct realtek_priv *priv)
{
int timeout = 10;
u32 val;
int ret;
- realtek_smi_write_reg_noack(smi, RTL8366RB_RESET_CTRL_REG,
- RTL8366RB_CHIP_CTRL_RESET_HW);
+ priv->write_reg_noack(priv, RTL8366RB_RESET_CTRL_REG,
+ RTL8366RB_CHIP_CTRL_RESET_HW);
do {
usleep_range(20000, 25000);
- ret = regmap_read(smi->map, RTL8366RB_RESET_CTRL_REG, &val);
+ ret = regmap_read(priv->map, RTL8366RB_RESET_CTRL_REG, &val);
if (ret)
return ret;
@@ -1718,21 +1733,21 @@ static int rtl8366rb_reset_chip(struct realtek_smi *smi)
} while (--timeout);
if (!timeout) {
- dev_err(smi->dev, "timeout waiting for the switch to reset\n");
+ dev_err(priv->dev, "timeout waiting for the switch to reset\n");
return -EIO;
}
return 0;
}
-static int rtl8366rb_detect(struct realtek_smi *smi)
+static int rtl8366rb_detect(struct realtek_priv *priv)
{
- struct device *dev = smi->dev;
+ struct device *dev = priv->dev;
int ret;
u32 val;
/* Detect device */
- ret = regmap_read(smi->map, 0x5c, &val);
+ ret = regmap_read(priv->map, 0x5c, &val);
if (ret) {
dev_err(dev, "can't get chip ID (%d)\n", ret);
return ret;
@@ -1745,11 +1760,11 @@ static int rtl8366rb_detect(struct realtek_smi *smi)
return -ENODEV;
case 0x5937:
dev_info(dev, "found an RTL8366RB switch\n");
- smi->cpu_port = RTL8366RB_PORT_NUM_CPU;
- smi->num_ports = RTL8366RB_NUM_PORTS;
- smi->num_vlan_mc = RTL8366RB_NUM_VLANS;
- smi->mib_counters = rtl8366rb_mib_counters;
- smi->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters);
+ priv->cpu_port = RTL8366RB_PORT_NUM_CPU;
+ priv->num_ports = RTL8366RB_NUM_PORTS;
+ priv->num_vlan_mc = RTL8366RB_NUM_VLANS;
+ priv->mib_counters = rtl8366rb_mib_counters;
+ priv->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters);
break;
default:
dev_info(dev, "found an Unknown Realtek switch (id=0x%04x)\n",
@@ -1757,14 +1772,14 @@ static int rtl8366rb_detect(struct realtek_smi *smi)
break;
}
- ret = rtl8366rb_reset_chip(smi);
+ ret = rtl8366rb_reset_chip(priv);
if (ret)
return ret;
return 0;
}
-static const struct dsa_switch_ops rtl8366rb_switch_ops = {
+static const struct dsa_switch_ops rtl8366rb_switch_ops_smi = {
.get_tag_protocol = rtl8366_get_tag_protocol,
.setup = rtl8366rb_setup,
.phylink_mac_link_up = rtl8366rb_mac_link_up,
@@ -1787,7 +1802,32 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = {
.port_max_mtu = rtl8366rb_max_mtu,
};
-static const struct realtek_smi_ops rtl8366rb_smi_ops = {
+static const struct dsa_switch_ops rtl8366rb_switch_ops_mdio = {
+ .get_tag_protocol = rtl8366_get_tag_protocol,
+ .setup = rtl8366rb_setup,
+ .phy_read = rtl8366rb_dsa_phy_read,
+ .phy_write = rtl8366rb_dsa_phy_write,
+ .phylink_mac_link_up = rtl8366rb_mac_link_up,
+ .phylink_mac_link_down = rtl8366rb_mac_link_down,
+ .get_strings = rtl8366_get_strings,
+ .get_ethtool_stats = rtl8366_get_ethtool_stats,
+ .get_sset_count = rtl8366_get_sset_count,
+ .port_bridge_join = rtl8366rb_port_bridge_join,
+ .port_bridge_leave = rtl8366rb_port_bridge_leave,
+ .port_vlan_filtering = rtl8366rb_vlan_filtering,
+ .port_vlan_add = rtl8366_vlan_add,
+ .port_vlan_del = rtl8366_vlan_del,
+ .port_enable = rtl8366rb_port_enable,
+ .port_disable = rtl8366rb_port_disable,
+ .port_pre_bridge_flags = rtl8366rb_port_pre_bridge_flags,
+ .port_bridge_flags = rtl8366rb_port_bridge_flags,
+ .port_stp_state_set = rtl8366rb_port_stp_state_set,
+ .port_fast_age = rtl8366rb_port_fast_age,
+ .port_change_mtu = rtl8366rb_change_mtu,
+ .port_max_mtu = rtl8366rb_max_mtu,
+};
+
+static const struct realtek_ops rtl8366rb_ops = {
.detect = rtl8366rb_detect,
.get_vlan_mc = rtl8366rb_get_vlan_mc,
.set_vlan_mc = rtl8366rb_set_vlan_mc,
@@ -1803,12 +1843,17 @@ static const struct realtek_smi_ops rtl8366rb_smi_ops = {
.phy_write = rtl8366rb_phy_write,
};
-const struct realtek_smi_variant rtl8366rb_variant = {
- .ds_ops = &rtl8366rb_switch_ops,
- .ops = &rtl8366rb_smi_ops,
+const struct realtek_variant rtl8366rb_variant = {
+ .ds_ops_smi = &rtl8366rb_switch_ops_smi,
+ .ds_ops_mdio = &rtl8366rb_switch_ops_mdio,
+ .ops = &rtl8366rb_ops,
.clk_delay = 10,
.cmd_read = 0xa9,
.cmd_write = 0xa8,
.chip_data_sz = sizeof(struct rtl8366rb),
};
EXPORT_SYMBOL_GPL(rtl8366rb_variant);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Driver for RTL8366RB ethernet switch");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index 0730352cdd57..bc06fe6bac6b 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -442,34 +442,27 @@ static void xrs700x_teardown(struct dsa_switch *ds)
cancel_delayed_work_sync(&priv->mib_work);
}
-static void xrs700x_phylink_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void xrs700x_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
switch (port) {
case 0:
+ __set_bit(PHY_INTERFACE_MODE_RMII,
+ config->supported_interfaces);
+ config->mac_capabilities = MAC_10FD | MAC_100FD;
break;
+
case 1:
case 2:
case 3:
- phylink_set(mask, 1000baseT_Full);
+ phy_interface_set_rgmii(config->supported_interfaces);
+ config->mac_capabilities = MAC_10FD | MAC_100FD | MAC_1000FD;
break;
+
default:
- linkmode_zero(supported);
dev_err(ds->dev, "Unsupported port: %i\n", port);
- return;
+ break;
}
-
- phylink_set_port_modes(mask);
-
- /* The switch only supports full duplex. */
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Full);
-
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
}
static void xrs700x_mac_link_up(struct dsa_switch *ds, int port,
@@ -703,7 +696,7 @@ static const struct dsa_switch_ops xrs700x_ops = {
.setup = xrs700x_setup,
.teardown = xrs700x_teardown,
.port_stp_state_set = xrs700x_port_stp_state_set,
- .phylink_validate = xrs700x_phylink_validate,
+ .phylink_get_caps = xrs700x_phylink_get_caps,
.phylink_mac_link_up = xrs700x_mac_link_up,
.get_strings = xrs700x_get_strings,
.get_sset_count = xrs700x_get_sset_count,
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 8aec5d9fbfef..ad57209007e1 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -138,11 +138,6 @@ MODULE_PARM_DESC(use_mmio, "Use MMIO (1) or PIO(0) to access the NIC. "
module_param(rx_copybreak, int, 0);
module_param(use_mmio, int, 0);
-#if defined(NETIF_F_TSO) && MAX_SKB_FRAGS > 32
-#warning Typhoon only supports 32 entries in its SG list for TSO, disabling TSO
-#undef NETIF_F_TSO
-#endif
-
#if TXLO_ENTRIES <= (2 * MAX_SKB_FRAGS)
#error TX ring too small!
#endif
@@ -2261,9 +2256,28 @@ out:
return mode;
}
+#if MAX_SKB_FRAGS > 32
+
+#include <net/vxlan.h>
+
+static netdev_features_t typhoon_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ if (skb_shinfo(skb)->nr_frags > 32 && skb_is_gso(skb))
+ features &= ~NETIF_F_GSO_MASK;
+
+ features = vlan_features_check(skb, features);
+ return vxlan_features_check(skb, features);
+}
+#endif
+
static const struct net_device_ops typhoon_netdev_ops = {
.ndo_open = typhoon_open,
.ndo_stop = typhoon_close,
+#if MAX_SKB_FRAGS > 32
+ .ndo_features_check = typhoon_features_check,
+#endif
.ndo_start_xmit = typhoon_start_tx,
.ndo_set_rx_mode = typhoon_set_rx_mode,
.ndo_tx_timeout = typhoon_tx_timeout,
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 537e6a85e18d..fbf4588994ac 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -2413,11 +2413,13 @@ static void et131x_tx_dma_memory_free(struct et131x_adapter *adapter)
kfree(tx_ring->tcb_ring);
}
+#define MAX_TX_DESC_PER_PKT 24
+
/* nic_send_packet - NIC specific send handler for version B silicon. */
static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb)
{
u32 i;
- struct tx_desc desc[24];
+ struct tx_desc desc[MAX_TX_DESC_PER_PKT];
u32 frag = 0;
u32 thiscopy, remainder;
struct sk_buff *skb = tcb->skb;
@@ -2432,9 +2434,6 @@ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb)
* more than 5 fragments.
*/
- /* nr_frags should be no more than 18. */
- BUILD_BUG_ON(MAX_SKB_FRAGS + 1 > 23);
-
memset(desc, 0, sizeof(struct tx_desc) * (nr_frags + 1));
for (i = 0; i < nr_frags; i++) {
@@ -3762,6 +3761,13 @@ static netdev_tx_t et131x_tx(struct sk_buff *skb, struct net_device *netdev)
struct et131x_adapter *adapter = netdev_priv(netdev);
struct tx_ring *tx_ring = &adapter->tx_ring;
+ /* This driver does not support TSO, it is very unlikely
+ * this condition is true.
+ */
+ if (unlikely(skb_shinfo(skb)->nr_frags > MAX_TX_DESC_PER_PKT - 2)) {
+ if (skb_linearize(skb))
+ goto drop_err;
+ }
/* stop the queue if it's getting full */
if (tx_ring->used >= NUM_TCB - 1 && !netif_queue_stopped(netdev))
netif_stop_queue(netdev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 53080fd143dc..07444aead3fd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1400,10 +1400,9 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
struct sk_buff *skb;
if (!first_frag)
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_copybreak);
+ skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak);
else
- skb = build_skb(first_frag, ENA_PAGE_SIZE);
+ skb = napi_build_skb(first_frag, ENA_PAGE_SIZE);
if (unlikely(!skb)) {
ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index a19dd6797070..447a75ea0cc1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1271,7 +1271,7 @@ struct bnx2x_fw_stats_data {
struct per_port_stats port;
struct per_pf_stats pf;
struct fcoe_statistics_params fcoe;
- struct per_queue_stats queue_stats[1];
+ struct per_queue_stats queue_stats[];
};
/* Public slow path states */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4f94136a011a..c313221348c5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = {
ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
+ ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE,
};
static struct workqueue_struct *bnxt_pf_wq;
@@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
(BNXT_EVENT_RING_TYPE(data2) == \
ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
+#define BNXT_EVENT_PHC_EVENT_TYPE(data1) \
+ (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\
+ ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT)
+
+#define BNXT_EVENT_PHC_RTC_UPDATE(data1) \
+ (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\
+ ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT)
+
+#define BNXT_PHC_BITS 48
+
static int bnxt_async_event_process(struct bnxt *bp,
struct hwrm_async_event_cmpl *cmpl)
{
@@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp,
bnxt_event_error_report(bp, data1, data2);
goto async_event_process_exit;
}
+ case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
+ switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
+ case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
+ if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u64 ns;
+
+ spin_lock_bh(&ptp->ptp_lock);
+ bnxt_ptp_update_current_time(bp);
+ ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
+ BNXT_PHC_BITS) | ptp->current_time);
+ bnxt_ptp_rtc_timecounter_init(ptp, ns);
+ spin_unlock_bh(&ptp->ptp_lock);
+ }
+ break;
+ }
+ goto async_event_process_exit;
+ }
case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
@@ -7414,6 +7443,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
struct hwrm_port_mac_ptp_qcfg_output *resp;
struct hwrm_port_mac_ptp_qcfg_input *req;
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ bool phc_cfg;
u8 flags;
int rc;
@@ -7456,7 +7486,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
rc = -ENODEV;
goto exit;
}
- rc = bnxt_ptp_init(bp);
+ phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0;
+ rc = bnxt_ptp_init(bp, phc_cfg);
if (rc)
netdev_warn(bp->dev, "PTP initialization failed.\n");
exit:
@@ -7514,6 +7545,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+ if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_PTP_RTC;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
@@ -10288,6 +10321,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ bnxt_ptp_init_rtc(bp, true);
return 0;
open_err_irq:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 440dfeb4948b..4b023e35c765 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1957,6 +1957,7 @@ struct bnxt {
#define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000
#define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000
#define BNXT_FW_CAP_HOT_RESET 0x00200000
+ #define BNXT_FW_CAP_PTP_RTC 0x00400000
#define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000
#define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000
#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 003330e8cd58..5edbee92f5c4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -11,6 +11,7 @@
#include <linux/ctype.h>
#include <linux/stringify.h>
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/linkmode.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
@@ -802,9 +803,11 @@ static void bnxt_get_ringparam(struct net_device *dev,
if (bp->flags & BNXT_FLAG_AGG_RINGS) {
ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+ kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
} else {
ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
ering->rx_jumbo_max_pending = 0;
+ kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED;
}
ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index ea86c54247c7..b7100edbd6dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -369,6 +369,12 @@ struct cmd_nums {
#define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL
#define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL
#define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL
+ #define HWRM_FUNC_BACKING_STORE_CFG_V2 0x1a3UL
+ #define HWRM_FUNC_BACKING_STORE_QCFG_V2 0x1a4UL
+ #define HWRM_FUNC_DBR_PACING_CFG 0x1a5UL
+ #define HWRM_FUNC_DBR_PACING_QCFG 0x1a6UL
+ #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT 0x1a7UL
+ #define HWRM_FUNC_BACKING_STORE_QCAPS_V2 0x1a8UL
#define HWRM_SELFTEST_QLIST 0x200UL
#define HWRM_SELFTEST_EXEC 0x201UL
#define HWRM_SELFTEST_IRQ 0x202UL
@@ -390,6 +396,9 @@ struct cmd_nums {
#define HWRM_MFG_PRVSN_IMPORT_CERT 0x212UL
#define HWRM_MFG_PRVSN_GET_STATE 0x213UL
#define HWRM_MFG_GET_NVM_MEASUREMENT 0x214UL
+ #define HWRM_MFG_PSOC_QSTATUS 0x215UL
+ #define HWRM_MFG_SELFTEST_QLIST 0x216UL
+ #define HWRM_MFG_SELFTEST_EXEC 0x217UL
#define HWRM_TF 0x2bcUL
#define HWRM_TF_VERSION_GET 0x2bdUL
#define HWRM_TF_SESSION_OPEN 0x2c6UL
@@ -532,8 +541,8 @@ struct hwrm_err_output {
#define HWRM_VERSION_MAJOR 1
#define HWRM_VERSION_MINOR 10
#define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 63
-#define HWRM_VERSION_STR "1.10.2.63"
+#define HWRM_VERSION_RSVD 73
+#define HWRM_VERSION_STR "1.10.2.73"
/* hwrm_ver_get_input (size:192b/24B) */
struct hwrm_ver_get_input {
@@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl {
#define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL
#define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL
#define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST 0x42UL
- #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER 0x43UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE 0x43UL
#define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP 0x44UL
#define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT 0x45UL
- #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x46UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD 0x46UL
+ #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x47UL
#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL
#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL
#define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request {
__le32 event_data1;
};
-/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
-struct hwrm_async_event_cmpl_phc_master {
+/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_update {
__le16 type;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK 0x3fUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT 0x2eUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK 0x3fUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT
__le16 event_id;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE
__le32 event_data2;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT 16
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT 16
u8 opaque_v;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_V 0x1UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_V 0x1UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1
u8 timestamp_lo;
__le16 timestamp_hi;
__le32 event_data1;
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK 0xfUL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT 0
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL
- #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK 0xfUL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT 0
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE 0x4UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK 0xffff0UL
+ #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT 4
};
/* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
@@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm {
#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
};
+/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold {
+ __le16 type;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK 0x3fUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT
+ __le16 event_id;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT
+ __le32 event_data2;
+ u8 opaque_v;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V 0x1UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1
+ u8 timestamp_lo;
+ __le16 timestamp_hi;
+ __le32 event_data1;
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT 0
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL
+ #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
+};
+
/* hwrm_func_reset_input (size:192b/24B) */
struct hwrm_func_reset_input {
__le16 req_type;
@@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output {
#define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL
#define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED 0x4000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED 0x8000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED 0x10000000UL
+ #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED 0x20000000UL
u8 max_schqs;
u8 mpc_chnls_cap;
#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL
@@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output {
__le16 rkc_entry_size;
__le32 tkc_max_entries;
__le32 rkc_max_entries;
- u8 rsvd[7];
+ u8 rsvd1[7];
u8 valid;
};
@@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output {
u8 valid;
};
-/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+/* hwrm_func_ptp_cfg_input (size:384b/48B) */
struct hwrm_func_ptp_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input {
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD 0x8UL
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP 0x10UL
#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE 0x20UL
+ #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME 0x40UL
u8 ptp_pps_event;
#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL 0x1UL
#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL 0x2UL
@@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input {
__le32 ptp_freq_adj_ext_up;
__le32 ptp_freq_adj_ext_phase_lower;
__le32 ptp_freq_adj_ext_phase_upper;
+ __le64 ptp_set_time;
};
/* hwrm_func_ptp_cfg_output (size:128b/16B) */
@@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output {
u8 valid;
};
+/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_ext_cfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 enables;
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID 0x1UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID 0x2UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE 0x4UL
+ #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER 0x8UL
+ __le16 phc_master_fid;
+ __le16 phc_sec_fid;
+ u8 phc_sec_mode;
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH 0x0UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL 0x1UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL
+ #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY
+ u8 unused_0;
+ __le32 failover_timer;
+ u8 unused_1[4];
+};
+
+/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_ext_cfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 unused_0[7];
+ u8 valid;
+};
+
+/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_ext_qcfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ u8 unused_0[8];
+};
+
+/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */
+struct hwrm_func_ptp_ext_qcfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 phc_master_fid;
+ __le16 phc_sec_fid;
+ __le16 phc_active_fid0;
+ __le16 phc_active_fid1;
+ __le32 last_failover_event;
+ __le16 from_fid;
+ __le16 to_fid;
+ u8 unused_0[7];
+ u8 valid;
+};
+
+/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */
+struct hwrm_func_backing_store_cfg_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+ __le16 instance;
+ __le32 flags;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL
+ __le64 page_dir;
+ __le32 num_entries;
+ __le16 entry_size;
+ u8 page_size_pbl_level;
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK 0xfUL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT 0
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0 0x0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1 0x1UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 0x2UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK 0xf0UL
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT 4
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K (0x0UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K (0x1UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K (0x2UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M (0x3UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M (0x4UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G (0x5UL << 4)
+ #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G
+ u8 subtype_valid_cnt;
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+};
+
+/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */
+struct hwrm_func_backing_store_cfg_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 rsvd0[7];
+ u8 valid;
+};
+
+/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcfg_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
+ __le16 instance;
+ u8 rsvd[4];
+};
+
+/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcfg_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+ __le16 instance;
+ __le32 flags;
+ __le64 page_dir;
+ __le32 num_entries;
+ u8 page_size_pbl_level;
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK 0xfUL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT 0
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0 0x0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1 0x1UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 0x2UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK 0xf0UL
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT 4
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K (0x0UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K (0x1UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K (0x2UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M (0x3UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M (0x4UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G (0x5UL << 4)
+ #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G
+ u8 subtype_valid_cnt;
+ u8 rsvd[2];
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+ u8 rsvd2[7];
+ u8 valid;
+};
+
+/* qpc_split_entries (size:128b/16B) */
+struct qpc_split_entries {
+ __le32 qp_num_l2_entries;
+ __le32 qp_num_qp1_entries;
+ __le32 rsvd[2];
+};
+
+/* srq_split_entries (size:128b/16B) */
+struct srq_split_entries {
+ __le32 srq_num_l2_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* cq_split_entries (size:128b/16B) */
+struct cq_split_entries {
+ __le32 cq_num_l2_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* vnic_split_entries (size:128b/16B) */
+struct vnic_split_entries {
+ __le32 vnic_num_vnic_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* mrav_split_entries (size:128b/16B) */
+struct mrav_split_entries {
+ __le32 mrav_num_av_entries;
+ __le32 rsvd;
+ __le32 rsvd2[2];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcaps_v2_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+ u8 rsvd[6];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcaps_v2_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ __le16 type;
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC 0x13UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC 0x14UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+ __le16 entry_size;
+ __le32 flags;
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL
+ #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID 0x2UL
+ __le32 instance_bit_map;
+ u8 ctx_init_value;
+ u8 ctx_init_offset;
+ u8 entry_multiple;
+ u8 rsvd;
+ __le32 max_num_entries;
+ __le32 min_num_entries;
+ __le16 next_valid_type;
+ u8 subtype_valid_cnt;
+ u8 rsvd2;
+ __le32 split_entry_0;
+ __le32 split_entry_1;
+ __le32 split_entry_2;
+ __le32 split_entry_3;
+ u8 rsvd3[3];
+ u8 valid;
+};
+
/* hwrm_func_drv_if_change_input (size:192b/24B) */
struct hwrm_func_drv_if_change_input {
__le16 req_type;
@@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output {
u8 valid;
};
-/* hwrm_port_mac_cfg_input (size:384b/48B) */
+/* hwrm_port_mac_cfg_input (size:448b/56B) */
struct hwrm_port_mac_cfg_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input {
#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5
u8 unused_0[3];
__le32 ptp_freq_adj_ppb;
- __le32 ptp_adj_phase;
+ u8 unused_1[4];
+ __le64 ptp_adj_phase;
};
/* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output {
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS 0x4UL
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x8UL
#define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK 0x10UL
+ #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED 0x20UL
u8 unused_0[3];
__le32 rx_ts_reg_off_lower;
__le32 rx_ts_reg_off_upper;
@@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output {
#define PORT_PHY_QCAPS_RESP_PORT_CNT_2 0x2UL
#define PORT_PHY_QCAPS_RESP_PORT_CNT_3 0x3UL
#define PORT_PHY_QCAPS_RESP_PORT_CNT_4 0x4UL
- #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_4
+ #define PORT_PHY_QCAPS_RESP_PORT_CNT_12 0xcUL
+ #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_12
__le16 supported_speeds_force_mode;
#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL
#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL
@@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output {
__le16 flags2;
#define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL
#define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL
- u8 unused_0[1];
+ u8 internal_port_cnt;
u8 valid;
};
@@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input {
__le16 target_id;
__le64 resp_addr;
__le32 hash_type;
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL
- #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL
+ #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL
__le16 vnic_id;
u8 ring_table_pair_index;
u8 hash_mode_flags;
@@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
u8 valid;
};
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
struct hwrm_tunnel_dst_port_query_input {
__le16 req_type;
__le16 cmpl_ring;
@@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output {
u8 valid;
};
+/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */
+struct hwrm_dbg_crashdump_medium_cfg_input {
+ __le16 req_type;
+ __le16 cmpl_ring;
+ __le16 seq_id;
+ __le16 target_id;
+ __le64 resp_addr;
+ __le16 output_dest_flags;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR 0x1UL
+ __le16 pg_size_lvl;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK 0x3UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT 0
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0 0x0UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1 0x1UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 0x2UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK 0x1cUL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT 2
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT 5
+ __le32 size;
+ __le32 coredump_component_disable_flags;
+ #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM 0x1UL
+ __le32 unused_0;
+ __le64 pbl;
+};
+
+/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */
+struct hwrm_dbg_crashdump_medium_cfg_output {
+ __le16 error_code;
+ __le16 req_type;
+ __le16 seq_id;
+ __le16 resp_len;
+ u8 unused_1[7];
+ u8 valid;
+};
+
/* coredump_segment_record (size:128b/16B) */
struct coredump_segment_record {
__le16 component_id;
@@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output {
__le16 resp_len;
__le64 installed_items;
u8 result;
- #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
- #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE 0xffUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE 0xfdUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER 0xfbUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER 0xf3UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE 0xf2UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER 0xecUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE 0xebUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM 0xeaUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH 0xe9UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST 0xe8UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER 0xe7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM 0xe6UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM 0xe5UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH 0xe4UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE 0xe1UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV 0xceUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID 0xcdUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR 0xccUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID 0xcbUL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM 0xc5UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM 0xc4UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM 0xc3UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR 0xb9UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR 0xb8UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND 0xb0UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED 0xa7UL
+ #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED
u8 problem_item;
#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL
#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 48520967746f..a0b321a19361 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -19,6 +19,20 @@
#include "bnxt_hwrm.h"
#include "bnxt_ptp.h"
+static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time)
+{
+ struct hwrm_func_ptp_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+ if (rc)
+ return rc;
+
+ req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME);
+ req->ptp_set_time = cpu_to_le64(time);
+ return hwrm_req_send(bp, req);
+}
+
int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
{
unsigned int ptp_class;
@@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
ptp_info);
u64 ns = timespec64_to_ns(ts);
+ if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+ return bnxt_ptp_cfg_settime(ptp->bp, ns);
+
spin_lock_bh(&ptp->ptp_lock);
timecounter_init(&ptp->tc, &ptp->cc, ns);
spin_unlock_bh(&ptp->ptp_lock);
@@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
return 0;
}
+/* Caller holds ptp_lock */
+void bnxt_ptp_update_current_time(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
+ WRITE_ONCE(ptp->old_time, ptp->current_time);
+}
+
+static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
+{
+ struct hwrm_port_mac_cfg_input *req;
+ int rc;
+
+ rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG);
+ if (rc)
+ return rc;
+
+ req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE);
+ req->ptp_adj_phase = cpu_to_le64(delta);
+
+ rc = hwrm_req_send(ptp->bp, req);
+ if (rc) {
+ netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc);
+ } else {
+ spin_lock_bh(&ptp->ptp_lock);
+ bnxt_ptp_update_current_time(ptp->bp);
+ spin_unlock_bh(&ptp->ptp_lock);
+ }
+
+ return rc;
+}
+
static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
{
struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
ptp_info);
+ if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+ return bnxt_ptp_adjphc(ptp, delta);
+
spin_lock_bh(&ptp->ptp_lock);
timecounter_adjtime(&ptp->tc, delta);
spin_unlock_bh(&ptp->ptp_lock);
@@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp)
return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
}
-int bnxt_ptp_init(struct bnxt *bp)
+static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ if (!ptp->ptp_clock) {
+ memset(&ptp->cc, 0, sizeof(ptp->cc));
+ ptp->cc.read = bnxt_cc_read;
+ ptp->cc.mask = CYCLECOUNTER_MASK(48);
+ ptp->cc.shift = 0;
+ ptp->cc.mult = 1;
+ ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
+ }
+ if (init_tc)
+ timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+}
+
+/* Caller holds ptp_lock */
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns)
+{
+ timecounter_init(&ptp->tc, &ptp->cc, ns);
+ /* For RTC, cycle_last must be in sync with the timecounter value. */
+ ptp->tc.cycle_last = ns & ptp->cc.mask;
+}
+
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
+{
+ struct timespec64 tsp;
+ u64 ns;
+ int rc;
+
+ if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+ return -ENODEV;
+
+ if (!phc_cfg) {
+ ktime_get_real_ts64(&tsp);
+ ns = timespec64_to_ns(&tsp);
+ rc = bnxt_ptp_cfg_settime(bp, ns);
+ if (rc)
+ return rc;
+ } else {
+ rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns);
+ if (rc)
+ return rc;
+ }
+ spin_lock_bh(&bp->ptp_cfg->ptp_lock);
+ bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns);
+ spin_unlock_bh(&bp->ptp_cfg->ptp_lock);
+
+ return 0;
+}
+
+static void bnxt_ptp_free(struct bnxt *bp)
+{
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+ if (ptp->ptp_clock) {
+ ptp_clock_unregister(ptp->ptp_clock);
+ ptp->ptp_clock = NULL;
+ kfree(ptp->ptp_info.pin_config);
+ ptp->ptp_info.pin_config = NULL;
+ }
+}
+
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
int rc;
@@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp)
if (rc)
return rc;
+ if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ bnxt_ptp_timecounter_init(bp, false);
+ rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+ if (rc)
+ goto out;
+ }
+
if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
return 0;
- if (ptp->ptp_clock) {
- ptp_clock_unregister(ptp->ptp_clock);
- ptp->ptp_clock = NULL;
- kfree(ptp->ptp_info.pin_config);
- ptp->ptp_info.pin_config = NULL;
- }
+ bnxt_ptp_free(bp);
+
atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
spin_lock_init(&ptp->ptp_lock);
- memset(&ptp->cc, 0, sizeof(ptp->cc));
- ptp->cc.read = bnxt_cc_read;
- ptp->cc.mask = CYCLECOUNTER_MASK(48);
- ptp->cc.shift = 0;
- ptp->cc.mult = 1;
-
- ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
- timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+ if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+ bnxt_ptp_timecounter_init(bp, true);
ptp->ptp_info = bnxt_ptp_caps;
if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
@@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp)
int err = PTR_ERR(ptp->ptp_clock);
ptp->ptp_clock = NULL;
- bnxt_unmap_ptp_regs(bp);
- return err;
+ rc = err;
+ goto out;
}
if (bp->flags & BNXT_FLAG_CHIP_P5) {
spin_lock_bh(&ptp->ptp_lock);
@@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp)
ptp_schedule_worker(ptp->ptp_clock, 0);
}
return 0;
+
+out:
+ bnxt_ptp_free(bp);
+ bnxt_unmap_ptp_regs(bp);
+ return rc;
}
void bnxt_ptp_clear(struct bnxt *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 7c528e1f8713..373baf45884b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -131,12 +131,15 @@ do { \
#endif
int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_update_current_time(struct bnxt *bp);
void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
void bnxt_ptp_reapply_pps(struct bnxt *bp);
int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
void bnxt_ptp_clear(struct bnxt *bp);
#endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 87f1056e29ff..cfe09117fe6c 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
if (!p->eee_enabled) {
bcmgenet_eee_enable_set(dev, false);
} else {
- ret = phy_init_eee(dev->phydev, 0);
+ ret = phy_init_eee(dev->phydev, false);
if (ret) {
netif_err(priv, hw, dev, "EEE initialization failed\n");
return ret;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 9ddbee7de72b..f0a7d8396a4a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -12,6 +12,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/net_tstamp.h>
#include <linux/interrupt.h>
+#include <linux/phy/phy.h>
#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
#define MACB_EXT_DESC
@@ -1291,6 +1292,9 @@ struct macb {
u32 wol;
struct macb_ptp_info *ptp_info; /* macb-ptp interface */
+
+ struct phy *sgmii_phy; /* for ZynqMP SGMII mode */
+
#ifdef MACB_EXT_DESC
uint8_t hw_dma_cap;
#endif
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 98498a76ae16..4c231159b562 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -34,7 +34,9 @@
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/iopoll.h>
+#include <linux/phy/phy.h>
#include <linux/pm_runtime.h>
+#include <linux/reset.h>
#include "macb.h"
/* This structure is only used for MACB on SiFive FU540 devices */
@@ -2739,10 +2741,14 @@ static int macb_open(struct net_device *dev)
macb_init_hw(bp);
- err = macb_phylink_connect(bp);
+ err = phy_power_on(bp->sgmii_phy);
if (err)
goto reset_hw;
+ err = macb_phylink_connect(bp);
+ if (err)
+ goto phy_off;
+
netif_tx_start_all_queues(dev);
if (bp->ptp_info)
@@ -2750,6 +2756,9 @@ static int macb_open(struct net_device *dev)
return 0;
+phy_off:
+ phy_power_off(bp->sgmii_phy);
+
reset_hw:
macb_reset_hw(bp);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
@@ -2775,6 +2784,8 @@ static int macb_close(struct net_device *dev)
phylink_stop(bp->phylink);
phylink_disconnect_phy(bp->phylink);
+ phy_power_off(bp->sgmii_phy);
+
spin_lock_irqsave(&bp->lock, flags);
macb_reset_hw(bp);
netif_carrier_off(dev);
@@ -4544,13 +4555,55 @@ static const struct macb_config np4_config = {
.usrio = &macb_default_usrio,
};
+static int zynqmp_init(struct platform_device *pdev)
+{
+ struct net_device *dev = platform_get_drvdata(pdev);
+ struct macb *bp = netdev_priv(dev);
+ int ret;
+
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ /* Ensure PS-GTR PHY device used in SGMII mode is ready */
+ bp->sgmii_phy = devm_phy_get(&pdev->dev, "sgmii-phy");
+
+ if (IS_ERR(bp->sgmii_phy)) {
+ ret = PTR_ERR(bp->sgmii_phy);
+ dev_err_probe(&pdev->dev, ret,
+ "failed to get PS-GTR PHY\n");
+ return ret;
+ }
+
+ ret = phy_init(bp->sgmii_phy);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to init PS-GTR PHY: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ /* Fully reset GEM controller at hardware level using zynqmp-reset driver,
+ * if mapped in device tree.
+ */
+ ret = device_reset_optional(&pdev->dev);
+ if (ret) {
+ dev_err_probe(&pdev->dev, ret, "failed to reset controller");
+ phy_exit(bp->sgmii_phy);
+ return ret;
+ }
+
+ ret = macb_init(pdev);
+ if (ret)
+ phy_exit(bp->sgmii_phy);
+
+ return ret;
+}
+
static const struct macb_config zynqmp_config = {
.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
MACB_CAPS_JUMBO |
MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
- .init = macb_init,
+ .init = zynqmp_init,
.jumbo_max_len = 10240,
.usrio = &macb_default_usrio,
};
@@ -4767,7 +4820,7 @@ static int macb_probe(struct platform_device *pdev)
err = macb_mii_init(bp);
if (err)
- goto err_out_free_netdev;
+ goto err_out_phy_exit;
netif_carrier_off(dev);
@@ -4792,6 +4845,9 @@ err_out_unregister_mdio:
mdiobus_unregister(bp->mii_bus);
mdiobus_free(bp->mii_bus);
+err_out_phy_exit:
+ phy_exit(bp->sgmii_phy);
+
err_out_free_netdev:
free_netdev(dev);
@@ -4813,6 +4869,7 @@ static int macb_remove(struct platform_device *pdev)
if (dev) {
bp = netdev_priv(dev);
+ phy_exit(bp->sgmii_phy);
mdiobus_unregister(bp->mii_bus);
mdiobus_free(bp->mii_bus);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index ba28aa444e5a..8e07192e409f 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1539,7 +1539,7 @@ static int liquidio_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
* compute the delta in terms of coprocessor clocks.
*/
delta = (u64)ppb << 32;
- do_div(delta, oct->coproc_clock_rate);
+ div64_u64(delta, oct->coproc_clock_rate);
spin_lock_irqsave(&lio->ptp_lock, flags);
comp = lio_pci_readq(oct, CN6XXX_MIO_PTP_CLOCK_COMP);
@@ -1672,7 +1672,7 @@ static void liquidio_ptp_init(struct octeon_device *oct)
u64 clock_comp, cfg;
clock_comp = (u64)NSEC_PER_SEC << 32;
- do_div(clock_comp, oct->coproc_clock_rate);
+ div64_u64(clock_comp, oct->coproc_clock_rate);
lio_pci_writeq(oct, clock_comp, CN6XXX_MIO_PTP_CLOCK_COMP);
/* Enable */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 574a32f23f96..2f6484dc186a 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1409,7 +1409,8 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle,
struct device *dev = &bgx->pdev->dev;
struct acpi_device *adev;
- if (acpi_bus_get_device(handle, &adev))
+ adev = acpi_fetch_acpi_dev(handle);
+ if (!adev)
goto out;
acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index c78b99a497df..8014eb33937c 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port)
static int gemini_ethernet_port_probe(struct platform_device *pdev)
{
char *port_names[2] = { "ethernet0", "ethernet1" };
+ struct device_node *np = pdev->dev.of_node;
struct gemini_ethernet_port *port;
struct device *dev = &pdev->dev;
struct gemini_ethernet *geth;
struct net_device *netdev;
struct device *parent;
+ u8 mac[ETH_ALEN];
unsigned int id;
int irq;
int ret;
@@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
netif_napi_add(netdev, &port->napi, gmac_napi_poll,
DEFAULT_NAPI_WEIGHT);
+ ret = of_get_mac_address(np, mac);
+ if (!ret) {
+ dev_info(dev, "Setting macaddr from DT %pM\n", mac);
+ memcpy(port->mac_addr, mac, ETH_ALEN);
+ }
+
if (is_valid_ether_addr((void *)port->mac_addr)) {
eth_hw_addr_set(netdev, (u8 *)port->mac_addr);
} else {
diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c
index 3fb39e32e1b4..653bde48ef44 100644
--- a/drivers/net/ethernet/dec/tulip/pnic.c
+++ b/drivers/net/ethernet/dec/tulip/pnic.c
@@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev)
struct tulip_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->base_addr;
u32 phy_reg = ioread32(ioaddr + 0xB8);
- u32 new_csr6 = tp->csr6 & ~0x40C40200;
+ u32 new_csr6;
if (phy_reg & 0x78000000) { /* Ignore baseT4 */
if (phy_reg & 0x20000000) dev->if_port = 5;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index c710dc17be90..8dd7bf9014ec 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -340,7 +340,7 @@ enum wake_event_bits {
struct netdev_desc {
__le32 next_desc;
__le32 status;
- struct desc_frag { __le32 addr, length; } frag[1];
+ struct desc_frag { __le32 addr, length; } frag;
};
/* Bits in netdev_desc.status */
@@ -980,8 +980,8 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue)
le32_to_cpu(np->tx_ring[i].next_desc),
le32_to_cpu(np->tx_ring[i].status),
(le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff,
- le32_to_cpu(np->tx_ring[i].frag[0].addr),
- le32_to_cpu(np->tx_ring[i].frag[0].length));
+ le32_to_cpu(np->tx_ring[i].frag.addr),
+ le32_to_cpu(np->tx_ring[i].frag.length));
}
printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n",
ioread32(np->base + TxListPtr),
@@ -1027,7 +1027,7 @@ static void init_ring(struct net_device *dev)
np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma +
((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring));
np->rx_ring[i].status = 0;
- np->rx_ring[i].frag[0].length = 0;
+ np->rx_ring[i].frag.length = 0;
np->rx_skbuff[i] = NULL;
}
@@ -1039,16 +1039,16 @@ static void init_ring(struct net_device *dev)
if (skb == NULL)
break;
skb_reserve(skb, 2); /* 16 byte align the IP header. */
- np->rx_ring[i].frag[0].addr = cpu_to_le32(
+ np->rx_ring[i].frag.addr = cpu_to_le32(
dma_map_single(&np->pci_dev->dev, skb->data,
np->rx_buf_sz, DMA_FROM_DEVICE));
if (dma_mapping_error(&np->pci_dev->dev,
- np->rx_ring[i].frag[0].addr)) {
+ np->rx_ring[i].frag.addr)) {
dev_kfree_skb(skb);
np->rx_skbuff[i] = NULL;
break;
}
- np->rx_ring[i].frag[0].length = cpu_to_le32(np->rx_buf_sz | LastFrag);
+ np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag);
}
np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
@@ -1097,12 +1097,12 @@ start_tx (struct sk_buff *skb, struct net_device *dev)
txdesc->next_desc = 0;
txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign);
- txdesc->frag[0].addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev,
+ txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev,
skb->data, skb->len, DMA_TO_DEVICE));
if (dma_mapping_error(&np->pci_dev->dev,
- txdesc->frag[0].addr))
+ txdesc->frag.addr))
goto drop_frame;
- txdesc->frag[0].length = cpu_to_le32 (skb->len | LastFrag);
+ txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag);
/* Increment cur_tx before tasklet_schedule() */
np->cur_tx++;
@@ -1151,7 +1151,7 @@ reset_tx (struct net_device *dev)
skb = np->tx_skbuff[i];
if (skb) {
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(np->tx_ring[i].frag[0].addr),
+ le32_to_cpu(np->tx_ring[i].frag.addr),
skb->len, DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
np->tx_skbuff[i] = NULL;
@@ -1271,12 +1271,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
skb = np->tx_skbuff[entry];
/* Free the original skb. */
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(np->tx_ring[entry].frag[0].addr),
+ le32_to_cpu(np->tx_ring[entry].frag.addr),
skb->len, DMA_TO_DEVICE);
dev_consume_skb_irq(np->tx_skbuff[entry]);
np->tx_skbuff[entry] = NULL;
- np->tx_ring[entry].frag[0].addr = 0;
- np->tx_ring[entry].frag[0].length = 0;
+ np->tx_ring[entry].frag.addr = 0;
+ np->tx_ring[entry].frag.length = 0;
}
spin_unlock(&np->lock);
} else {
@@ -1290,12 +1290,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
skb = np->tx_skbuff[entry];
/* Free the original skb. */
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(np->tx_ring[entry].frag[0].addr),
+ le32_to_cpu(np->tx_ring[entry].frag.addr),
skb->len, DMA_TO_DEVICE);
dev_consume_skb_irq(np->tx_skbuff[entry]);
np->tx_skbuff[entry] = NULL;
- np->tx_ring[entry].frag[0].addr = 0;
- np->tx_ring[entry].frag[0].length = 0;
+ np->tx_ring[entry].frag.addr = 0;
+ np->tx_ring[entry].frag.length = 0;
}
spin_unlock(&np->lock);
}
@@ -1372,16 +1372,16 @@ static void rx_poll(struct tasklet_struct *t)
(skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
skb_reserve(skb, 2); /* 16 byte align the IP header */
dma_sync_single_for_cpu(&np->pci_dev->dev,
- le32_to_cpu(desc->frag[0].addr),
+ le32_to_cpu(desc->frag.addr),
np->rx_buf_sz, DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
dma_sync_single_for_device(&np->pci_dev->dev,
- le32_to_cpu(desc->frag[0].addr),
+ le32_to_cpu(desc->frag.addr),
np->rx_buf_sz, DMA_FROM_DEVICE);
skb_put(skb, pkt_len);
} else {
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(desc->frag[0].addr),
+ le32_to_cpu(desc->frag.addr),
np->rx_buf_sz, DMA_FROM_DEVICE);
skb_put(skb = np->rx_skbuff[entry], pkt_len);
np->rx_skbuff[entry] = NULL;
@@ -1427,18 +1427,18 @@ static void refill_rx (struct net_device *dev)
if (skb == NULL)
break; /* Better luck next round. */
skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
- np->rx_ring[entry].frag[0].addr = cpu_to_le32(
+ np->rx_ring[entry].frag.addr = cpu_to_le32(
dma_map_single(&np->pci_dev->dev, skb->data,
np->rx_buf_sz, DMA_FROM_DEVICE));
if (dma_mapping_error(&np->pci_dev->dev,
- np->rx_ring[entry].frag[0].addr)) {
+ np->rx_ring[entry].frag.addr)) {
dev_kfree_skb_irq(skb);
np->rx_skbuff[entry] = NULL;
break;
}
}
/* Perhaps we need not reset this field. */
- np->rx_ring[entry].frag[0].length =
+ np->rx_ring[entry].frag.length =
cpu_to_le32(np->rx_buf_sz | LastFrag);
np->rx_ring[entry].status = 0;
cnt++;
@@ -1870,14 +1870,14 @@ static int netdev_close(struct net_device *dev)
(int)(np->tx_ring_dma));
for (i = 0; i < TX_RING_SIZE; i++)
printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n",
- i, np->tx_ring[i].status, np->tx_ring[i].frag[0].addr,
- np->tx_ring[i].frag[0].length);
+ i, np->tx_ring[i].status, np->tx_ring[i].frag.addr,
+ np->tx_ring[i].frag.length);
printk(KERN_DEBUG " Rx ring %8.8x:\n",
(int)(np->rx_ring_dma));
for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) {
printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n",
- i, np->rx_ring[i].status, np->rx_ring[i].frag[0].addr,
- np->rx_ring[i].frag[0].length);
+ i, np->rx_ring[i].status, np->rx_ring[i].frag.addr,
+ np->rx_ring[i].frag.length);
}
}
#endif /* __i386__ debugging only */
@@ -1892,19 +1892,19 @@ static int netdev_close(struct net_device *dev)
skb = np->rx_skbuff[i];
if (skb) {
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(np->rx_ring[i].frag[0].addr),
+ le32_to_cpu(np->rx_ring[i].frag.addr),
np->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(skb);
np->rx_skbuff[i] = NULL;
}
- np->rx_ring[i].frag[0].addr = cpu_to_le32(0xBADF00D0); /* poison */
+ np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */
}
for (i = 0; i < TX_RING_SIZE; i++) {
np->tx_ring[i].next_desc = 0;
skb = np->tx_skbuff[i];
if (skb) {
dma_unmap_single(&np->pci_dev->dev,
- le32_to_cpu(np->tx_ring[i].frag[0].addr),
+ le32_to_cpu(np->tx_ring[i].frag.addr),
skb->len, DMA_TO_DEVICE);
dev_kfree_skb(skb);
np->tx_skbuff[i] = NULL;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index dd9385d15f6b..c4a48e6f1758 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -18,6 +18,7 @@
#include <linux/ptp_classify.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
+#include <net/tso.h>
#include "dpaa2-eth.h"
@@ -760,6 +761,39 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv,
}
}
+static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv)
+{
+ struct dpaa2_eth_sgt_cache *sgt_cache;
+ void *sgt_buf = NULL;
+ int sgt_buf_size;
+
+ sgt_cache = this_cpu_ptr(priv->sgt_cache);
+ sgt_buf_size = priv->tx_data_offset +
+ DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry);
+
+ if (sgt_cache->count == 0)
+ sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
+ else
+ sgt_buf = sgt_cache->buf[--sgt_cache->count];
+ if (!sgt_buf)
+ return NULL;
+
+ memset(sgt_buf, 0, sgt_buf_size);
+
+ return sgt_buf;
+}
+
+static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf)
+{
+ struct dpaa2_eth_sgt_cache *sgt_cache;
+
+ sgt_cache = this_cpu_ptr(priv->sgt_cache);
+ if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
+ skb_free_frag(sgt_buf);
+ else
+ sgt_cache->buf[sgt_cache->count++] = sgt_buf;
+}
+
/* Create a frame descriptor based on a fragmented skb */
static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
struct sk_buff *skb,
@@ -805,12 +839,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
/* Prepare the HW SGT structure */
sgt_buf_size = priv->tx_data_offset +
sizeof(struct dpaa2_sg_entry) * num_dma_bufs;
- sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
+ sgt_buf = dpaa2_eth_sgt_get(priv);
if (unlikely(!sgt_buf)) {
err = -ENOMEM;
goto sgt_buf_alloc_failed;
}
- memset(sgt_buf, 0, sgt_buf_size);
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
@@ -846,6 +879,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
err = -ENOMEM;
goto dma_map_single_failed;
}
+ memset(fd, 0, sizeof(struct dpaa2_fd));
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
dpaa2_fd_set_addr(fd, addr);
@@ -855,7 +889,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
return 0;
dma_map_single_failed:
- skb_free_frag(sgt_buf);
+ dpaa2_eth_sgt_recycle(priv, sgt_buf);
sgt_buf_alloc_failed:
dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
dma_map_sg_failed:
@@ -875,7 +909,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
void **swa_addr)
{
struct device *dev = priv->net_dev->dev.parent;
- struct dpaa2_eth_sgt_cache *sgt_cache;
struct dpaa2_sg_entry *sgt;
struct dpaa2_eth_swa *swa;
dma_addr_t addr, sgt_addr;
@@ -884,18 +917,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
int err;
/* Prepare the HW SGT structure */
- sgt_cache = this_cpu_ptr(priv->sgt_cache);
sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry);
-
- if (sgt_cache->count == 0)
- sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN,
- GFP_ATOMIC);
- else
- sgt_buf = sgt_cache->buf[--sgt_cache->count];
+ sgt_buf = dpaa2_eth_sgt_get(priv);
if (unlikely(!sgt_buf))
return -ENOMEM;
-
- sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL);
@@ -923,6 +948,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
goto sgt_map_failed;
}
+ memset(fd, 0, sizeof(struct dpaa2_fd));
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
dpaa2_fd_set_addr(fd, sgt_addr);
@@ -934,10 +960,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
sgt_map_failed:
dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL);
data_map_failed:
- if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
- kfree(sgt_buf);
- else
- sgt_cache->buf[sgt_cache->count++] = sgt_buf;
+ dpaa2_eth_sgt_recycle(priv, sgt_buf);
return err;
}
@@ -978,6 +1001,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
if (unlikely(dma_mapping_error(dev, addr)))
return -ENOMEM;
+ memset(fd, 0, sizeof(struct dpaa2_fd));
dpaa2_fd_set_addr(fd, addr);
dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start));
dpaa2_fd_set_len(fd, skb->len);
@@ -1005,9 +1029,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_swa *swa;
u8 fd_format = dpaa2_fd_get_format(fd);
u32 fd_len = dpaa2_fd_get_len(fd);
-
- struct dpaa2_eth_sgt_cache *sgt_cache;
struct dpaa2_sg_entry *sgt;
+ int should_free_skb = 1;
+ int i;
fd_addr = dpaa2_fd_get_addr(fd);
buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
@@ -1039,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
/* Unmap the SGT buffer */
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
DMA_BIDIRECTIONAL);
+ } else if (swa->type == DPAA2_ETH_SWA_SW_TSO) {
+ skb = swa->tso.skb;
+
+ sgt = (struct dpaa2_sg_entry *)(buffer_start +
+ priv->tx_data_offset);
+
+ /* Unmap and free the header */
+ dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE,
+ DMA_TO_DEVICE);
+ kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt)));
+
+ /* Unmap the other SG entries for the data */
+ for (i = 1; i < swa->tso.num_sg; i++)
+ dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
+ dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
+
+ /* Unmap the SGT buffer */
+ dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
+ DMA_BIDIRECTIONAL);
+
+ if (!swa->tso.is_last_fd)
+ should_free_skb = 0;
} else {
skb = swa->single.skb;
@@ -1067,55 +1113,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
}
/* Get the timestamp value */
- if (skb->cb[0] == TX_TSTAMP) {
- struct skb_shared_hwtstamps shhwtstamps;
- __le64 *ts = dpaa2_get_ts(buffer_start, true);
- u64 ns;
-
- memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-
- ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
- shhwtstamps.hwtstamp = ns_to_ktime(ns);
- skb_tstamp_tx(skb, &shhwtstamps);
- } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
- mutex_unlock(&priv->onestep_tstamp_lock);
+ if (swa->type != DPAA2_ETH_SWA_SW_TSO) {
+ if (skb->cb[0] == TX_TSTAMP) {
+ struct skb_shared_hwtstamps shhwtstamps;
+ __le64 *ts = dpaa2_get_ts(buffer_start, true);
+ u64 ns;
+
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+ ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
+ shhwtstamps.hwtstamp = ns_to_ktime(ns);
+ skb_tstamp_tx(skb, &shhwtstamps);
+ } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
+ mutex_unlock(&priv->onestep_tstamp_lock);
+ }
}
/* Free SGT buffer allocated on tx */
- if (fd_format != dpaa2_fd_single) {
- sgt_cache = this_cpu_ptr(priv->sgt_cache);
- if (swa->type == DPAA2_ETH_SWA_SG) {
- skb_free_frag(buffer_start);
- } else {
- if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
- kfree(buffer_start);
- else
- sgt_cache->buf[sgt_cache->count++] = buffer_start;
+ if (fd_format != dpaa2_fd_single)
+ dpaa2_eth_sgt_recycle(priv, buffer_start);
+
+ /* Move on with skb release. If we are just confirming multiple FDs
+ * from the same TSO skb then only the last one will need to free the
+ * skb.
+ */
+ if (should_free_skb)
+ napi_consume_skb(skb, in_napi);
+}
+
+static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv,
+ struct sk_buff *skb, struct dpaa2_fd *fd,
+ int *num_fds, u32 *total_fds_len)
+{
+ struct device *dev = priv->net_dev->dev.parent;
+ int hdr_len, total_len, data_left, fd_len;
+ int num_sge, err, i, sgt_buf_size;
+ struct dpaa2_fd *fd_start = fd;
+ struct dpaa2_sg_entry *sgt;
+ struct dpaa2_eth_swa *swa;
+ dma_addr_t sgt_addr, addr;
+ dma_addr_t tso_hdr_dma;
+ unsigned int index = 0;
+ struct tso_t tso;
+ char *tso_hdr;
+ void *sgt_buf;
+
+ /* Initialize the TSO handler, and prepare the first payload */
+ hdr_len = tso_start(skb, &tso);
+ *total_fds_len = 0;
+
+ total_len = skb->len - hdr_len;
+ while (total_len > 0) {
+ /* Prepare the HW SGT structure for this frame */
+ sgt_buf = dpaa2_eth_sgt_get(priv);
+ if (unlikely(!sgt_buf)) {
+ netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n");
+ err = -ENOMEM;
+ goto err_sgt_get;
}
+ sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+
+ /* Determine the data length of this frame */
+ data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+ total_len -= data_left;
+ fd_len = data_left + hdr_len;
+
+ /* Prepare packet headers: MAC + IP + TCP */
+ tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC);
+ if (!tso_hdr) {
+ err = -ENOMEM;
+ goto err_alloc_tso_hdr;
+ }
+
+ tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0);
+ tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, tso_hdr_dma)) {
+ netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n");
+ err = -ENOMEM;
+ goto err_map_tso_hdr;
+ }
+
+ /* Setup the SG entry for the header */
+ dpaa2_sg_set_addr(sgt, tso_hdr_dma);
+ dpaa2_sg_set_len(sgt, hdr_len);
+ dpaa2_sg_set_final(sgt, data_left > 0 ? false : true);
+
+ /* Compose the SG entries for each fragment of data */
+ num_sge = 1;
+ while (data_left > 0) {
+ int size;
+
+ /* Move to the next SG entry */
+ sgt++;
+ size = min_t(int, tso.size, data_left);
+
+ addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, addr)) {
+ netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n");
+ err = -ENOMEM;
+ goto err_map_data;
+ }
+ dpaa2_sg_set_addr(sgt, addr);
+ dpaa2_sg_set_len(sgt, size);
+ dpaa2_sg_set_final(sgt, size == data_left ? true : false);
+
+ num_sge++;
+
+ /* Build the data for the __next__ fragment */
+ data_left -= size;
+ tso_build_data(skb, &tso, size);
+ }
+
+ /* Store the skb backpointer in the SGT buffer */
+ sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry);
+ swa = (struct dpaa2_eth_swa *)sgt_buf;
+ swa->type = DPAA2_ETH_SWA_SW_TSO;
+ swa->tso.skb = skb;
+ swa->tso.num_sg = num_sge;
+ swa->tso.sgt_size = sgt_buf_size;
+ swa->tso.is_last_fd = total_len == 0 ? 1 : 0;
+
+ /* Separately map the SGT buffer */
+ sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(dev, sgt_addr))) {
+ netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n");
+ err = -ENOMEM;
+ goto err_map_sgt;
+ }
+
+ /* Setup the frame descriptor */
+ memset(fd, 0, sizeof(struct dpaa2_fd));
+ dpaa2_fd_set_offset(fd, priv->tx_data_offset);
+ dpaa2_fd_set_format(fd, dpaa2_fd_sg);
+ dpaa2_fd_set_addr(fd, sgt_addr);
+ dpaa2_fd_set_len(fd, fd_len);
+ dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
+
+ *total_fds_len += fd_len;
+ /* Advance to the next frame descriptor */
+ fd++;
+ index++;
}
- /* Move on with skb release */
- napi_consume_skb(skb, in_napi);
+ *num_fds = index;
+
+ return 0;
+
+err_map_sgt:
+err_map_data:
+ /* Unmap all the data S/G entries for the current FD */
+ sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+ for (i = 1; i < num_sge; i++)
+ dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
+ dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
+
+ /* Unmap the header entry */
+ dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE);
+err_map_tso_hdr:
+ kfree(tso_hdr);
+err_alloc_tso_hdr:
+ dpaa2_eth_sgt_recycle(priv, sgt_buf);
+err_sgt_get:
+ /* Free all the other FDs that were already fully created */
+ for (i = 0; i < index; i++)
+ dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false);
+
+ return err;
}
static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
struct net_device *net_dev)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
- struct dpaa2_fd fd;
- struct rtnl_link_stats64 *percpu_stats;
+ int total_enqueued = 0, retries = 0, enqueued;
struct dpaa2_eth_drv_stats *percpu_extras;
+ struct rtnl_link_stats64 *percpu_stats;
+ unsigned int needed_headroom;
+ int num_fds = 1, max_retries;
struct dpaa2_eth_fq *fq;
struct netdev_queue *nq;
+ struct dpaa2_fd *fd;
u16 queue_mapping;
- unsigned int needed_headroom;
- u32 fd_len;
+ void *swa = NULL;
u8 prio = 0;
int err, i;
- void *swa;
+ u32 fd_len;
percpu_stats = this_cpu_ptr(priv->percpu_stats);
percpu_extras = this_cpu_ptr(priv->percpu_extras);
+ fd = (this_cpu_ptr(priv->fd))->array;
needed_headroom = dpaa2_eth_needed_headroom(skb);
@@ -1130,20 +1316,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
}
/* Setup the FD fields */
- memset(&fd, 0, sizeof(fd));
- if (skb_is_nonlinear(skb)) {
- err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa);
+ if (skb_is_gso(skb)) {
+ err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len);
+ percpu_extras->tx_sg_frames += num_fds;
+ percpu_extras->tx_sg_bytes += fd_len;
+ percpu_extras->tx_tso_frames += num_fds;
+ percpu_extras->tx_tso_bytes += fd_len;
+ } else if (skb_is_nonlinear(skb)) {
+ err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
+ fd_len = dpaa2_fd_get_len(fd);
} else if (skb_headroom(skb) < needed_headroom) {
- err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa);
+ err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
percpu_extras->tx_converted_sg_frames++;
percpu_extras->tx_converted_sg_bytes += skb->len;
+ fd_len = dpaa2_fd_get_len(fd);
} else {
- err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa);
+ err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa);
+ fd_len = dpaa2_fd_get_len(fd);
}
if (unlikely(err)) {
@@ -1151,11 +1345,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
goto err_build_fd;
}
- if (skb->cb[0])
- dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb);
+ if (swa && skb->cb[0])
+ dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb);
/* Tracing point */
- trace_dpaa2_tx_fd(net_dev, &fd);
+ for (i = 0; i < num_fds; i++)
+ trace_dpaa2_tx_fd(net_dev, &fd[i]);
/* TxConf FQ selection relies on queue id from the stack.
* In case of a forwarded frame from another DPNI interface, we choose
@@ -1175,27 +1370,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
queue_mapping %= dpaa2_eth_queue_count(priv);
}
fq = &priv->fq[queue_mapping];
-
- fd_len = dpaa2_fd_get_len(&fd);
nq = netdev_get_tx_queue(net_dev, queue_mapping);
netdev_tx_sent_queue(nq, fd_len);
/* Everything that happens after this enqueues might race with
* the Tx confirmation callback for this frame
*/
- for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
- err = priv->enqueue(priv, fq, &fd, prio, 1, NULL);
- if (err != -EBUSY)
- break;
+ max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+ while (total_enqueued < num_fds && retries < max_retries) {
+ err = priv->enqueue(priv, fq, &fd[total_enqueued],
+ prio, num_fds - total_enqueued, &enqueued);
+ if (err == -EBUSY) {
+ retries++;
+ continue;
+ }
+
+ total_enqueued += enqueued;
}
- percpu_extras->tx_portal_busy += i;
+ percpu_extras->tx_portal_busy += retries;
+
if (unlikely(err < 0)) {
percpu_stats->tx_errors++;
/* Clean up everything, including freeing the skb */
- dpaa2_eth_free_tx_fd(priv, fq, &fd, false);
+ dpaa2_eth_free_tx_fd(priv, fq, fd, false);
netdev_tx_completed_queue(nq, 1, fd_len);
} else {
- percpu_stats->tx_packets++;
+ percpu_stats->tx_packets += total_enqueued;
percpu_stats->tx_bytes += fd_len;
}
@@ -1523,7 +1723,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
count = sgt_cache->count;
for (i = 0; i < count; i++)
- kfree(sgt_cache->buf[i]);
+ skb_free_frag(sgt_cache->buf[i]);
sgt_cache->count = 0;
}
}
@@ -4115,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev)
net_dev->features = NETIF_F_RXCSUM |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_SG | NETIF_F_HIGHDMA |
- NETIF_F_LLTX | NETIF_F_HW_TC;
+ NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO;
+ net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS;
net_dev->hw_features = net_dev->features;
if (priv->dpni_attrs.vlan_filter_entries)
@@ -4397,6 +4598,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
goto err_alloc_sgt_cache;
}
+ priv->fd = alloc_percpu(*priv->fd);
+ if (!priv->fd) {
+ dev_err(dev, "alloc_percpu(fds) failed\n");
+ err = -ENOMEM;
+ goto err_alloc_fds;
+ }
+
err = dpaa2_eth_netdev_init(net_dev);
if (err)
goto err_netdev_init;
@@ -4484,6 +4692,8 @@ err_poll_thread:
err_alloc_rings:
err_csum:
err_netdev_init:
+ free_percpu(priv->fd);
+err_alloc_fds:
free_percpu(priv->sgt_cache);
err_alloc_sgt_cache:
free_percpu(priv->percpu_extras);
@@ -4539,6 +4749,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
fsl_mc_free_irqs(ls_dev);
dpaa2_eth_free_rings(priv);
+ free_percpu(priv->fd);
free_percpu(priv->sgt_cache);
free_percpu(priv->percpu_stats);
free_percpu(priv->percpu_extras);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index e54e70ebdd05..b79831cd1a94 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type {
DPAA2_ETH_SWA_SINGLE,
DPAA2_ETH_SWA_SG,
DPAA2_ETH_SWA_XDP,
+ DPAA2_ETH_SWA_SW_TSO,
};
/* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */
@@ -142,6 +143,12 @@ struct dpaa2_eth_swa {
int dma_size;
struct xdp_frame *xdpf;
} xdp;
+ struct {
+ struct sk_buff *skb;
+ int num_sg;
+ int sgt_size;
+ int is_last_fd;
+ } tso;
};
};
@@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats {
__u64 tx_conf_bytes;
__u64 tx_sg_frames;
__u64 tx_sg_bytes;
+ __u64 tx_tso_frames;
+ __u64 tx_tso_bytes;
__u64 rx_sg_frames;
__u64 rx_sg_bytes;
/* Linear skbs sent as a S/G FD due to insufficient headroom */
@@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data {
struct dpaa2_eth_priv *priv;
};
+#define DPAA2_ETH_SG_ENTRIES_MAX (PAGE_SIZE / sizeof(struct scatterlist))
+
#define DPAA2_ETH_DEFAULT_COPYBREAK 512
+#define DPAA2_ETH_ENQUEUE_MAX_FDS 200
+struct dpaa2_eth_fds {
+ struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS];
+};
+
/* Driver private data */
struct dpaa2_eth_priv {
struct net_device *net_dev;
@@ -577,6 +593,8 @@ struct dpaa2_eth_priv {
struct devlink_port devlink_port;
u32 rx_copybreak;
+
+ struct dpaa2_eth_fds __percpu *fd;
};
struct dpaa2_eth_devlink_priv {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 3fdbf87dccb1..eea7d7a07c00 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
"[drv] tx conf bytes",
"[drv] tx sg frames",
"[drv] tx sg bytes",
+ "[drv] tx tso frames",
+ "[drv] tx tso bytes",
"[drv] rx sg frames",
"[drv] rx sg bytes",
"[drv] tx converted sg frames",
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 623d113b6581..521f036d1c00 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node,
return err;
}
+static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+
+ return mac->pcs;
+}
+
static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
@@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config,
static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = dpaa2_mac_select_pcs,
.mac_config = dpaa2_mac_config,
.mac_link_up = dpaa2_mac_link_up,
.mac_link_down = dpaa2_mac_link_down,
@@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
}
mac->phylink = phylink;
- if (mac->pcs)
- phylink_set_pcs(mac->phylink, mac->pcs);
-
err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
if (err) {
netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index fb39e406b7fc..68d806dc3701 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -18,6 +18,8 @@
#define ENETC_MAX_MTU (ENETC_MAC_MAXFRM_SIZE - \
(ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
+#define ENETC_CBD_DATA_MEM_ALIGN 64
+
struct enetc_tx_swbd {
union {
struct sk_buff *skb;
@@ -415,6 +417,42 @@ int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count);
int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count);
int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
+static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si,
+ struct enetc_cbd *cbd,
+ int size, dma_addr_t *dma,
+ void **data_align)
+{
+ struct enetc_cbdr *ring = &si->cbd_ring;
+ dma_addr_t dma_align;
+ void *data;
+
+ data = dma_alloc_coherent(ring->dma_dev,
+ size + ENETC_CBD_DATA_MEM_ALIGN,
+ dma, GFP_KERNEL);
+ if (!data) {
+ dev_err(ring->dma_dev, "CBD alloc data memory failed!\n");
+ return NULL;
+ }
+
+ dma_align = ALIGN(*dma, ENETC_CBD_DATA_MEM_ALIGN);
+ *data_align = PTR_ALIGN(data, ENETC_CBD_DATA_MEM_ALIGN);
+
+ cbd->addr[0] = cpu_to_le32(lower_32_bits(dma_align));
+ cbd->addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+ cbd->length = cpu_to_le16(size);
+
+ return data;
+}
+
+static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size,
+ void *data, dma_addr_t *dma)
+{
+ struct enetc_cbdr *ring = &si->cbd_ring;
+
+ dma_free_coherent(ring->dma_dev, size + ENETC_CBD_DATA_MEM_ALIGN,
+ data, *dma);
+}
+
#ifdef CONFIG_FSL_ENETC_QOS
int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 073e56dcca4e..af68dc46a795 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -166,70 +166,55 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
return enetc_send_cmd(si, &cbd);
}
-#define RFSE_ALIGN 64
/* Set entry in RFS table */
int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
int index)
{
struct enetc_cbdr *ring = &si->cbd_ring;
struct enetc_cbd cbd = {.cmd = 0};
- dma_addr_t dma, dma_align;
void *tmp, *tmp_align;
+ dma_addr_t dma;
int err;
/* fill up the "set" descriptor */
cbd.cmd = 0;
cbd.cls = 4;
cbd.index = cpu_to_le16(index);
- cbd.length = cpu_to_le16(sizeof(*rfse));
cbd.opt[3] = cpu_to_le32(0); /* SI */
- tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
- &dma, GFP_KERNEL);
- if (!tmp) {
- dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n");
+ tmp = enetc_cbd_alloc_data_mem(si, &cbd, sizeof(*rfse),
+ &dma, &tmp_align);
+ if (!tmp)
return -ENOMEM;
- }
- dma_align = ALIGN(dma, RFSE_ALIGN);
- tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN);
memcpy(tmp_align, rfse, sizeof(*rfse));
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
-
err = enetc_send_cmd(si, &cbd);
if (err)
dev_err(ring->dma_dev, "FS entry add failed (%d)!", err);
- dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
- tmp, dma);
+ enetc_cbd_free_data_mem(si, sizeof(*rfse), tmp, &dma);
return err;
}
-#define RSSE_ALIGN 64
static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
bool read)
{
struct enetc_cbdr *ring = &si->cbd_ring;
struct enetc_cbd cbd = {.cmd = 0};
- dma_addr_t dma, dma_align;
u8 *tmp, *tmp_align;
+ dma_addr_t dma;
int err, i;
- if (count < RSSE_ALIGN)
+ if (count < ENETC_CBD_DATA_MEM_ALIGN)
/* HW only takes in a full 64 entry table */
return -EINVAL;
- tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN,
- &dma, GFP_KERNEL);
- if (!tmp) {
- dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n");
+ tmp = enetc_cbd_alloc_data_mem(si, &cbd, count,
+ &dma, (void *)&tmp_align);
+ if (!tmp)
return -ENOMEM;
- }
- dma_align = ALIGN(dma, RSSE_ALIGN);
- tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN);
if (!read)
for (i = 0; i < count; i++)
@@ -238,10 +223,6 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
/* fill up the descriptor */
cbd.cmd = read ? 2 : 1;
cbd.cls = 3;
- cbd.length = cpu_to_le16(count);
-
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
err = enetc_send_cmd(si, &cbd);
if (err)
@@ -251,7 +232,7 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
for (i = 0; i < count; i++)
table[i] = tmp_align[i];
- dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma);
+ enetc_cbd_free_data_mem(si, count, tmp, &dma);
return err;
}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index ed16a5ac9ad0..a0c75c717073 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf)
enetc_imdio_remove(pf);
}
+static struct phylink_pcs *
+enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
+{
+ struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+ return pf->pcs;
+}
+
static void enetc_pl_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
struct enetc_pf *pf = phylink_to_enetc_pf(config);
- struct enetc_ndev_priv *priv;
enetc_mac_config(&pf->si->hw, state->interface);
-
- priv = netdev_priv(pf->si->ndev);
- if (pf->pcs)
- phylink_set_pcs(priv->phylink, pf->pcs);
}
static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
@@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config,
static const struct phylink_mac_ops enetc_mac_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = enetc_pl_mac_select_pcs,
.mac_config = enetc_pl_mac_config,
.mac_link_up = enetc_pl_mac_link_up,
.mac_link_down = enetc_pl_mac_link_down,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 3555c12edb45..5a3eea1a718b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -52,10 +52,11 @@ static int enetc_setup_taprio(struct net_device *ndev,
struct enetc_cbd cbd = {.cmd = 0};
struct tgs_gcl_conf *gcl_config;
struct tgs_gcl_data *gcl_data;
- struct gce *gce;
dma_addr_t dma;
+ struct gce *gce;
u16 data_size;
u16 gcl_len;
+ void *tmp;
u32 tge;
int err;
int i;
@@ -82,8 +83,9 @@ static int enetc_setup_taprio(struct net_device *ndev,
gcl_config = &cbd.gcl_conf;
data_size = struct_size(gcl_data, entry, gcl_len);
- gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
- if (!gcl_data)
+ tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+ &dma, (void *)&gcl_data);
+ if (!tmp)
return -ENOMEM;
gce = (struct gce *)(gcl_data + 1);
@@ -107,19 +109,8 @@ static int enetc_setup_taprio(struct net_device *ndev,
temp_gce->period = cpu_to_le32(temp_entry->interval);
}
- cbd.length = cpu_to_le16(data_size);
cbd.status_flags = 0;
- dma = dma_map_single(&priv->si->pdev->dev, gcl_data,
- data_size, DMA_TO_DEVICE);
- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
- kfree(gcl_data);
- return -ENOMEM;
- }
-
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
cbd.cls = BDCR_CMD_PORT_GCL;
cbd.status_flags = 0;
@@ -132,8 +123,7 @@ static int enetc_setup_taprio(struct net_device *ndev,
ENETC_QBV_PTGCR_OFFSET,
tge & (~ENETC_QBV_TGE));
- dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE);
- kfree(gcl_data);
+ enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
return err;
}
@@ -450,6 +440,7 @@ static struct actions_fwd enetc_act_fwd[] = {
};
static struct enetc_psfp epsfp = {
+ .dev_bitmap = 0,
.psfp_sfi_bitmap = NULL,
};
@@ -463,8 +454,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
struct enetc_cbd cbd = {.cmd = 0};
struct streamid_data *si_data;
struct streamid_conf *si_conf;
- u16 data_size;
dma_addr_t dma;
+ u16 data_size;
+ void *tmp;
int port;
int err;
@@ -485,21 +477,11 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
cbd.status_flags = 0;
data_size = sizeof(struct streamid_data);
- si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
- if (!si_data)
+ tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+ &dma, (void *)&si_data);
+ if (!tmp)
return -ENOMEM;
- cbd.length = cpu_to_le16(data_size);
- dma = dma_map_single(&priv->si->pdev->dev, si_data,
- data_size, DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
- err = -ENOMEM;
- goto out;
- }
-
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
eth_broadcast_addr(si_data->dmac);
si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK
+ ((0x3 << 14) | ENETC_CBDR_SID_VIDM));
@@ -520,11 +502,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
goto out;
/* Enable the entry overwrite again incase space flushed by hardware */
- memset(&cbd, 0, sizeof(cbd));
-
- cbd.index = cpu_to_le16((u16)sid->index);
- cbd.cmd = 0;
- cbd.cls = BDCR_CMD_STREAM_IDENTIFY;
cbd.status_flags = 0;
si_conf->en = 0x80;
@@ -537,11 +514,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
memset(si_data, 0, data_size);
- cbd.length = cpu_to_le16(data_size);
-
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
-
/* VIDM default to be 1.
* VID Match. If set (b1) then the VID must match, otherwise
* any VID is considered a match. VIDM setting is only used
@@ -561,10 +533,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
err = enetc_send_cmd(priv->si, &cbd);
out:
- if (!dma_mapping_error(&priv->si->pdev->dev, dma))
- dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE);
-
- kfree(si_data);
+ enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
return err;
}
@@ -635,6 +604,7 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
struct sfi_counter_data *data_buf;
dma_addr_t dma;
u16 data_size;
+ void *tmp;
int err;
cbd.index = cpu_to_le16((u16)index);
@@ -643,21 +613,11 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
cbd.status_flags = 0;
data_size = sizeof(struct sfi_counter_data);
- data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
- if (!data_buf)
- return -ENOMEM;
-
- dma = dma_map_single(&priv->si->pdev->dev, data_buf,
- data_size, DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
- err = -ENOMEM;
- goto exit;
- }
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
- cbd.length = cpu_to_le16(data_size);
+ tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+ &dma, (void *)&data_buf);
+ if (!tmp)
+ return -ENOMEM;
err = enetc_send_cmd(priv->si, &cbd);
if (err)
@@ -684,7 +644,8 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
data_buf->flow_meter_dropl;
exit:
- kfree(data_buf);
+ enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
+
return err;
}
@@ -726,6 +687,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
dma_addr_t dma;
u16 data_size;
int err, i;
+ void *tmp;
u64 now;
cbd.index = cpu_to_le16(sgi->index);
@@ -772,24 +734,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3;
data_size = struct_size(sgcl_data, sgcl, sgi->num_entries);
-
- sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
- if (!sgcl_data)
- return -ENOMEM;
-
- cbd.length = cpu_to_le16(data_size);
-
- dma = dma_map_single(&priv->si->pdev->dev,
- sgcl_data, data_size,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
- kfree(sgcl_data);
+ tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size,
+ &dma, (void *)&sgcl_data);
+ if (!tmp)
return -ENOMEM;
- }
-
- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
sgce = &sgcl_data->sgcl[0];
@@ -844,8 +792,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
err = enetc_send_cmd(priv->si, &cbd);
exit:
- kfree(sgcl_data);
-
+ enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma);
return err;
}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 796133de527e..11227f51404c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
int ret = 0;
if (enable) {
- ret = phy_init_eee(ndev->phydev, 0);
+ ret = phy_init_eee(ndev->phydev, false);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index af99017a5453..7d49c28215f3 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
u32 val, tempval;
struct timespec64 ts;
u64 ns;
- val = 0;
if (fep->pps_enable == enable)
return 0;
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 266e562bd67a..ef8058a17188 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -14,6 +14,7 @@
#include <linux/acpi.h>
#include <linux/acpi_mdio.h>
+#include <linux/clk.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mdio.h>
@@ -36,9 +37,10 @@ struct tgec_mdio_controller {
} __packed;
#define MDIO_STAT_ENC BIT(6)
-#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8)
+#define MDIO_STAT_CLKDIV(x) (((x) & 0x1ff) << 7)
#define MDIO_STAT_BSY BIT(0)
#define MDIO_STAT_RD_ER BIT(1)
+#define MDIO_STAT_PRE_DIS BIT(5)
#define MDIO_CTL_DEV_ADDR(x) (x & 0x1f)
#define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5)
#define MDIO_CTL_PRE_DIS BIT(10)
@@ -50,6 +52,8 @@ struct tgec_mdio_controller {
struct mdio_fsl_priv {
struct tgec_mdio_controller __iomem *mdio_base;
+ struct clk *enet_clk;
+ u32 mdc_freq;
bool is_little_endian;
bool has_a009885;
bool has_a011043;
@@ -254,6 +258,50 @@ irq_restore:
return ret;
}
+static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus)
+{
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ struct device *dev = bus->parent;
+ u32 mdio_stat, div;
+
+ if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq))
+ return 0;
+
+ priv->enet_clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(priv->enet_clk)) {
+ dev_err(dev, "Input clock unknown, not changing MDC frequency");
+ return PTR_ERR(priv->enet_clk);
+ }
+
+ div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2;
+ if (div < 5 || div > 0x1ff) {
+ dev_err(dev, "Requested MDC frequency is out of range, ignoring");
+ return -EINVAL;
+ }
+
+ mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+ mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff);
+ mdio_stat |= MDIO_STAT_CLKDIV(div);
+ xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+ return 0;
+}
+
+static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus)
+{
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ struct device *dev = bus->parent;
+ u32 mdio_stat;
+
+ if (!device_property_read_bool(dev, "suppress-preamble"))
+ return;
+
+ mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+ mdio_stat |= MDIO_STAT_PRE_DIS;
+ xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+}
+
static int xgmac_mdio_probe(struct platform_device *pdev)
{
struct fwnode_handle *fwnode;
@@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
return -EINVAL;
}
- bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
+ bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv));
if (!bus)
return -ENOMEM;
@@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
bus->probe_capabilities = MDIOBUS_C22_C45;
snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start);
- /* Set the PHY base address */
priv = bus->priv;
- priv->mdio_base = ioremap(res->start, resource_size(res));
- if (!priv->mdio_base) {
- ret = -ENOMEM;
- goto err_ioremap;
- }
+ priv->mdio_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!priv->mdio_base)
+ return -ENOMEM;
/* For both ACPI and DT cases, endianness of MDIO controller
* needs to be specified using "little-endian" property.
@@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
priv->has_a011043 = device_property_read_bool(&pdev->dev,
"fsl,erratum-a011043");
+ xgmac_mdio_set_suppress_preamble(bus);
+
+ ret = xgmac_mdio_set_mdc_freq(bus);
+ if (ret)
+ return ret;
+
fwnode = pdev->dev.fwnode;
if (is_of_node(fwnode))
ret = of_mdiobus_register(bus, to_of_node(fwnode));
@@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
ret = -EINVAL;
if (ret) {
dev_err(&pdev->dev, "cannot register MDIO bus\n");
- goto err_registration;
+ return ret;
}
platform_set_drvdata(pdev, bus);
return 0;
-
-err_registration:
- iounmap(priv->mdio_base);
-
-err_ioremap:
- mdiobus_free(bus);
-
- return ret;
-}
-
-static int xgmac_mdio_remove(struct platform_device *pdev)
-{
- struct mii_bus *bus = platform_get_drvdata(pdev);
- struct mdio_fsl_priv *priv = bus->priv;
-
- mdiobus_unregister(bus);
- iounmap(priv->mdio_base);
- mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id xgmac_mdio_match[] = {
@@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = {
.acpi_match_table = xgmac_acpi_match,
},
.probe = xgmac_mdio_probe,
- .remove = xgmac_mdio_remove,
};
module_platform_driver(xgmac_mdio_driver);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9298fbecb31a..6f18c9a03231 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -167,6 +167,7 @@ struct hnae3_handle;
struct hnae3_queue {
void __iomem *io_base;
+ void __iomem *mem_base;
struct hnae3_ae_algo *ae_algo;
struct hnae3_handle *handle;
int tqp_index; /* index in a handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index babc5d7a3b52..0b8a73c40b12 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2028,9 +2028,73 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
return bd_num;
}
+static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num)
+{
+#define HNS3_BYTES_PER_64BIT 8
+
+ struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {};
+ int offset = 0;
+
+ /* make sure everything is visible to device before
+ * excuting tx push or updating doorbell
+ */
+ dma_wmb();
+
+ do {
+ int idx = (ring->next_to_use - num + ring->desc_num) %
+ ring->desc_num;
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.tx_push++;
+ u64_stats_update_end(&ring->syncp);
+ memcpy(&desc[offset], &ring->desc[idx],
+ sizeof(struct hns3_desc));
+ offset++;
+ } while (--num);
+
+ __iowrite64_copy(ring->tqp->mem_base, desc,
+ (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) /
+ HNS3_BYTES_PER_64BIT);
+
+ io_stop_wc();
+}
+
+static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring)
+{
+#define HNS3_MEM_DOORBELL_OFFSET 64
+
+ __le64 bd_num = cpu_to_le64((u64)ring->pending_buf);
+
+ /* make sure everything is visible to device before
+ * excuting tx push or updating doorbell
+ */
+ dma_wmb();
+
+ __iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET,
+ &bd_num, 1);
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.tx_mem_doorbell += ring->pending_buf;
+ u64_stats_update_end(&ring->syncp);
+
+ io_stop_wc();
+}
+
static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
bool doorbell)
{
+ struct net_device *netdev = ring_to_netdev(ring);
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ /* when tx push is enabled, the packet whose number of BD below
+ * HNS3_MAX_PUSH_BD_NUM can be pushed directly.
+ */
+ if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
+ !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
+ hns3_tx_push_bd(ring, num);
+ WRITE_ONCE(ring->last_to_use, ring->next_to_use);
+ return;
+ }
+
ring->pending_buf += num;
if (!doorbell) {
@@ -2038,11 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
return;
}
- if (!ring->pending_buf)
- return;
+ if (ring->tqp->mem_base)
+ hns3_tx_mem_doorbell(ring);
+ else
+ writel(ring->pending_buf,
+ ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
- writel(ring->pending_buf,
- ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
ring->pending_buf = 0;
WRITE_ONCE(ring->last_to_use, ring->next_to_use);
}
@@ -2732,6 +2797,9 @@ static void hns3_dump_queue_stats(struct net_device *ndev,
"seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n",
tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more,
tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
+
+ netdev_info(ndev, "tx_push: %llu, tx_mem_doorbell: %llu\n",
+ tx_ring->stats.tx_push, tx_ring->stats.tx_mem_doorbell);
}
static void hns3_dump_queue_reg(struct net_device *ndev,
@@ -5094,6 +5162,9 @@ static void hns3_state_init(struct hnae3_handle *handle)
set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+ if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+ set_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state);
+
if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index a05a0c7423ce..4a3253692dcc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -7,6 +7,7 @@
#include <linux/dim.h>
#include <linux/if_vlan.h>
#include <net/page_pool.h>
+#include <asm/barrier.h>
#include "hnae3.h"
@@ -25,9 +26,12 @@ enum hns3_nic_state {
HNS3_NIC_STATE2_RESET_REQUESTED,
HNS3_NIC_STATE_HW_TX_CSUM_ENABLE,
HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE,
+ HNS3_NIC_STATE_TX_PUSH_ENABLE,
HNS3_NIC_STATE_MAX
};
+#define HNS3_MAX_PUSH_BD_NUM 2
+
#define HNS3_RING_RX_RING_BASEADDR_L_REG 0x00000
#define HNS3_RING_RX_RING_BASEADDR_H_REG 0x00004
#define HNS3_RING_RX_RING_BD_NUM_REG 0x00008
@@ -410,6 +414,8 @@ struct ring_stats {
u64 tx_pkts;
u64 tx_bytes;
u64 tx_more;
+ u64 tx_push;
+ u64 tx_mem_doorbell;
u64 restart_queue;
u64 tx_busy;
u64 tx_copy;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index c06c39ece80d..6469238ae090 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -23,6 +23,8 @@ static const struct hns3_stats hns3_txq_stats[] = {
HNS3_TQP_STAT("packets", tx_pkts),
HNS3_TQP_STAT("bytes", tx_bytes),
HNS3_TQP_STAT("more", tx_more),
+ HNS3_TQP_STAT("push", tx_push),
+ HNS3_TQP_STAT("mem_doorbell", tx_mem_doorbell),
HNS3_TQP_STAT("wake", restart_queue),
HNS3_TQP_STAT("busy", tx_busy),
HNS3_TQP_STAT("copy", tx_copy),
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 24f7afacae02..78d0498bdabc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1643,6 +1643,7 @@ static int hclge_config_gro(struct hclge_dev *hdev)
static int hclge_alloc_tqps(struct hclge_dev *hdev)
{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
struct hclge_comm_tqp *tqp;
int i;
@@ -1676,6 +1677,14 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev)
(i - HCLGE_TQP_MAX_SIZE_DEV_V2) *
HCLGE_TQP_REG_SIZE;
+ /* when device supports tx push and has device memory,
+ * the queue can execute push mode or doorbell mode on
+ * device memory.
+ */
+ if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+ tqp->q.mem_base = hdev->hw.hw.mem_base +
+ HCLGE_TQP_MEM_OFFSET(hdev, i);
+
tqp++;
}
@@ -11008,8 +11017,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
static int hclge_dev_mem_map(struct hclge_dev *hdev)
{
-#define HCLGE_MEM_BAR 4
-
struct pci_dev *pdev = hdev->pdev;
struct hclge_hw *hw = &hdev->hw;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index adfb26e79262..f7f5a4b09068 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -169,6 +169,14 @@ enum HLCGE_PORT_TYPE {
#define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U
#define HCLGE_TRIGGER_IMP_RESET_B 7U
+#define HCLGE_TQP_MEM_SIZE 0x10000
+#define HCLGE_MEM_BAR 4
+/* in the bar4, the first half is for roce, and the second half is for nic */
+#define HCLGE_NIC_MEM_OFFSET(hdev) \
+ (pci_resource_len((hdev)->pdev, HCLGE_MEM_BAR) >> 1)
+#define HCLGE_TQP_MEM_OFFSET(hdev, i) \
+ (HCLGE_NIC_MEM_OFFSET(hdev) + HCLGE_TQP_MEM_SIZE * (i))
+
#define HCLGE_MAC_DEFAULT_FRAME \
(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
#define HCLGE_MAC_MIN_FRAME 64
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 21442a9bb996..93389bec8d89 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -321,6 +321,7 @@ static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev)
static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
struct hclge_comm_tqp *tqp;
int i;
@@ -354,6 +355,14 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
(i - HCLGEVF_TQP_MAX_SIZE_DEV_V2) *
HCLGEVF_TQP_REG_SIZE;
+ /* when device supports tx push and has device memory,
+ * the queue can execute push mode or doorbell mode on
+ * device memory.
+ */
+ if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps))
+ tqp->q.mem_base = hdev->hw.hw.mem_base +
+ HCLGEVF_TQP_MEM_OFFSET(hdev, i);
+
tqp++;
}
@@ -2546,8 +2555,6 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
static int hclgevf_dev_mem_map(struct hclgevf_dev *hdev)
{
-#define HCLGEVF_MEM_BAR 4
-
struct pci_dev *pdev = hdev->pdev;
struct hclgevf_hw *hw = &hdev->hw;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 502ca1ce1a90..4b00fd44f118 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -96,6 +96,14 @@
#define HCLGEVF_RSS_IND_TBL_SIZE 512
+#define HCLGEVF_TQP_MEM_SIZE 0x10000
+#define HCLGEVF_MEM_BAR 4
+/* in the bar4, the first half is for roce, and the second half is for nic */
+#define HCLGEVF_NIC_MEM_OFFSET(hdev) \
+ (pci_resource_len((hdev)->pdev, HCLGEVF_MEM_BAR) >> 1)
+#define HCLGEVF_TQP_MEM_OFFSET(hdev, i) \
+ (HCLGEVF_NIC_MEM_OFFSET(hdev) + HCLGEVF_TQP_MEM_SIZE * (i))
+
#define HCLGEVF_MAC_MAX_FRAME 9728
#define HCLGEVF_STATS_TIMER_INTERVAL 36U
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a42aeb555f34..6fb3437f68e0 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7388,9 +7388,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
resource_size_t flash_start, flash_len;
static int cards_found;
u16 aspm_disable_flag = 0;
- int bars, i, err, pci_using_dac;
u16 eeprom_data = 0;
u16 eeprom_apme_mask = E1000_EEPROM_APME;
+ int bars, i, err;
s32 ret_val = 0;
if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S)
@@ -7404,17 +7404,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- pci_using_dac = 0;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (!err) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
+ if (err) {
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
bars = pci_select_bars(pdev, IORESOURCE_MEM);
@@ -7550,10 +7544,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->priv_flags |= IFF_UNICAST_FLT;
- if (pci_using_dac) {
- netdev->features |= NETIF_F_HIGHDMA;
- netdev->vlan_features |= NETIF_F_HIGHDMA;
- }
+ netdev->features |= NETIF_F_HIGHDMA;
+ netdev->vlan_features |= NETIF_F_HIGHDMA;
/* MTU range: 68 - max_hw_frame_size */
netdev->min_mtu = ETH_MIN_MTU;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 80c5cecaf2b5..55c6bce5da61 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -854,6 +854,10 @@ struct i40e_vsi {
u64 tx_force_wb;
u64 rx_buf_failed;
u64 rx_page_failed;
+ u64 rx_page_reuse;
+ u64 rx_page_alloc;
+ u64 rx_page_waive;
+ u64 rx_page_busy;
/* These are containers of ring pointers, allocated at run-time */
struct i40e_ring **rx_rings;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 7abef88801fb..42439f725aa4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -769,7 +769,7 @@ static bool i40e_asq_done(struct i40e_hw *hw)
}
/**
- * i40e_asq_send_command_atomic - send command to Admin Queue
+ * i40e_asq_send_command_atomic_exec - send command to Admin Queue
* @hw: pointer to the hw struct
* @desc: prefilled descriptor describing the command (non DMA mem)
* @buff: buffer to use for indirect commands
@@ -780,11 +780,13 @@ static bool i40e_asq_done(struct i40e_hw *hw)
* This is the main send command driver routine for the Admin Queue send
* queue. It runs the queue, cleans the queue, etc
**/
-i40e_status
-i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
- void *buff, /* can be NULL */ u16 buff_size,
- struct i40e_asq_cmd_details *cmd_details,
- bool is_atomic_context)
+static i40e_status
+i40e_asq_send_command_atomic_exec(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context)
{
i40e_status status = 0;
struct i40e_dma_mem *dma_buff = NULL;
@@ -794,8 +796,6 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
u16 retval = 0;
u32 val = 0;
- mutex_lock(&hw->aq.asq_mutex);
-
if (hw->aq.asq.count == 0) {
i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
"AQTX: Admin queue not initialized.\n");
@@ -969,6 +969,36 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
}
asq_send_command_error:
+ return status;
+}
+
+/**
+ * i40e_asq_send_command_atomic - send command to Admin Queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buff: buffer to use for indirect commands
+ * @buff_size: size of buffer for indirect commands
+ * @cmd_details: pointer to command details structure
+ * @is_atomic_context: is the function called in an atomic context?
+ *
+ * Acquires the lock and calls the main send command execution
+ * routine.
+ **/
+i40e_status
+i40e_asq_send_command_atomic(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context)
+{
+ i40e_status status;
+
+ mutex_lock(&hw->aq.asq_mutex);
+ status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size,
+ cmd_details,
+ is_atomic_context);
+
mutex_unlock(&hw->aq.asq_mutex);
return status;
}
@@ -983,6 +1013,52 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
}
/**
+ * i40e_asq_send_command_atomic_v2 - send command to Admin Queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buff: buffer to use for indirect commands
+ * @buff_size: size of buffer for indirect commands
+ * @cmd_details: pointer to command details structure
+ * @is_atomic_context: is the function called in an atomic context?
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Acquires the lock and calls the main send command execution
+ * routine. Returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ **/
+i40e_status
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context,
+ enum i40e_admin_queue_err *aq_status)
+{
+ i40e_status status;
+
+ mutex_lock(&hw->aq.asq_mutex);
+ status = i40e_asq_send_command_atomic_exec(hw, desc, buff,
+ buff_size,
+ cmd_details,
+ is_atomic_context);
+ if (aq_status)
+ *aq_status = hw->aq.asq_last_status;
+ mutex_unlock(&hw->aq.asq_mutex);
+ return status;
+}
+
+i40e_status
+i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status)
+{
+ return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size,
+ cmd_details, true, aq_status);
+}
+
+/**
* i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
* @desc: pointer to the temp descriptor (non DMA mem)
* @opcode: the opcode can be used to decide which flags to turn off or on
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 9ddeb015eb7e..6aefffd83615 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
- status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
- sizeof(vsi_ctx->info), cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info),
+ cmd_details, true);
if (status)
goto aq_add_vsi_exit;
@@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
- status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
- sizeof(vsi_ctx->info), cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info),
+ cmd_details, true);
vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
@@ -2632,33 +2634,28 @@ get_veb_exit:
}
/**
- * i40e_aq_add_macvlan
- * @hw: pointer to the hw struct
- * @seid: VSI for the mac address
+ * i40e_prepare_add_macvlan
* @mv_list: list of macvlans to be added
+ * @desc: pointer to AQ descriptor structure
* @count: length of the list
- * @cmd_details: pointer to command details structure or NULL
+ * @seid: VSI for the mac address
*
- * Add MAC/VLAN addresses to the HW filtering
+ * Internal helper function that prepares the add macvlan request
+ * and returns the buffer size.
**/
-i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
- struct i40e_aqc_add_macvlan_element_data *mv_list,
- u16 count, struct i40e_asq_cmd_details *cmd_details)
+static u16
+i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list,
+ struct i40e_aq_desc *desc, u16 count, u16 seid)
{
- struct i40e_aq_desc desc;
struct i40e_aqc_macvlan *cmd =
- (struct i40e_aqc_macvlan *)&desc.params.raw;
- i40e_status status;
+ (struct i40e_aqc_macvlan *)&desc->params.raw;
u16 buf_size;
int i;
- if (count == 0 || !mv_list || !hw)
- return I40E_ERR_PARAM;
-
buf_size = count * sizeof(*mv_list);
/* prep the rest of the request */
- i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan);
+ i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan);
cmd->num_addresses = cpu_to_le16(count);
cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
cmd->seid[1] = 0;
@@ -2669,14 +2666,71 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
mv_list[i].flags |=
cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC);
- desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
if (buf_size > I40E_AQ_LARGE_BUF)
- desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+ desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
- status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
- cmd_details);
+ return buf_size;
+}
- return status;
+/**
+ * i40e_aq_add_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+i40e_status
+i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ u16 buf_size;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+ return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+ cmd_details, true);
+}
+
+/**
+ * i40e_aq_add_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Add MAC/VLAN addresses to the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+i40e_status
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status)
+{
+ struct i40e_aq_desc desc;
+ u16 buf_size;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+ return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+ cmd_details, true, aq_status);
}
/**
@@ -2715,13 +2769,59 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
if (buf_size > I40E_AQ_LARGE_BUF)
desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
- status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
- cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+ cmd_details, true);
return status;
}
/**
+ * i40e_aq_remove_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Remove MAC/VLAN addresses from the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+i40e_status
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_remove_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status)
+{
+ struct i40e_aqc_macvlan *cmd;
+ struct i40e_aq_desc desc;
+ u16 buf_size;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = count * sizeof(*mv_list);
+
+ /* prep the rest of the request */
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+ cmd = (struct i40e_aqc_macvlan *)&desc.params.raw;
+ cmd->num_addresses = cpu_to_le16(count);
+ cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+ cmd->seid[1] = 0;
+ cmd->seid[2] = 0;
+
+ desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+ return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+ cmd_details, true, aq_status);
+}
+
+/**
* i40e_mirrorrule_op - Internal helper function to add/delete mirror rule
* @hw: pointer to the hw struct
* @opcode: AQ opcode for add or delete mirror rule
@@ -3868,7 +3968,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
cmd->seid = cpu_to_le16(seid);
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+ cmd_details, true);
return status;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 1e57cc8c47d7..90fff05fbd2b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -275,9 +275,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
rx_ring->rx_stats.alloc_page_failed,
rx_ring->rx_stats.alloc_buff_failed);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n",
+ " rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n",
i,
- rx_ring->rx_stats.realloc_count,
rx_ring->rx_stats.page_reuse_count);
dev_info(&pf->pdev->dev,
" rx_rings[%i]: size = %i\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 091f36adbbe1..e48499624d22 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -295,6 +295,10 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
I40E_VSI_STAT("tx_busy", tx_busy),
I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+ I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse),
+ I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc),
+ I40E_VSI_STAT("rx_cache_waive", rx_page_waive),
+ I40E_VSI_STAT("rx_cache_busy", rx_page_busy),
};
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0c4b7dfb3b35..9b7ce6d9a92b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -773,6 +773,7 @@ void i40e_update_veb_stats(struct i40e_veb *veb)
**/
static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
{
+ u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy;
struct i40e_pf *pf = vsi->back;
struct rtnl_link_stats64 *ons;
struct rtnl_link_stats64 *ns; /* netdev stats */
@@ -780,7 +781,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
struct i40e_eth_stats *es; /* device's eth stats */
u64 tx_restart, tx_busy;
struct i40e_ring *p;
- u64 rx_page, rx_buf;
u64 bytes, packets;
unsigned int start;
u64 tx_linearize;
@@ -806,6 +806,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
rx_page = 0;
rx_buf = 0;
+ rx_reuse = 0;
+ rx_alloc = 0;
+ rx_waive = 0;
+ rx_busy = 0;
rcu_read_lock();
for (q = 0; q < vsi->num_queue_pairs; q++) {
/* locate Tx ring */
@@ -839,6 +843,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
rx_p += packets;
rx_buf += p->rx_stats.alloc_buff_failed;
rx_page += p->rx_stats.alloc_page_failed;
+ rx_reuse += p->rx_stats.page_reuse_count;
+ rx_alloc += p->rx_stats.page_alloc_count;
+ rx_waive += p->rx_stats.page_waive_count;
+ rx_busy += p->rx_stats.page_busy_count;
if (i40e_enabled_xdp_vsi(vsi)) {
/* locate XDP ring */
@@ -866,6 +874,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
vsi->tx_force_wb = tx_force_wb;
vsi->rx_page_failed = rx_page;
vsi->rx_buf_failed = rx_buf;
+ vsi->rx_page_reuse = rx_reuse;
+ vsi->rx_page_alloc = rx_alloc;
+ vsi->rx_page_waive = rx_waive;
+ vsi->rx_page_busy = rx_busy;
ns->rx_packets = rx_p;
ns->rx_bytes = rx_b;
@@ -2143,19 +2155,19 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
int num_del, int *retval)
{
struct i40e_hw *hw = &vsi->back->hw;
+ enum i40e_admin_queue_err aq_status;
i40e_status aq_ret;
- int aq_err;
- aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
- aq_err = hw->aq.asq_last_status;
+ aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL,
+ &aq_status);
/* Explicitly ignore and do not report when firmware returns ENOENT */
- if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+ if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) {
*retval = -EIO;
dev_info(&vsi->back->pdev->dev,
"ignoring delete macvlan error on %s, err %s, aq_err %s\n",
vsi_name, i40e_stat_str(hw, aq_ret),
- i40e_aq_str(hw, aq_err));
+ i40e_aq_str(hw, aq_status));
}
}
@@ -2178,10 +2190,10 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
int num_add)
{
struct i40e_hw *hw = &vsi->back->hw;
- int aq_err, fcnt;
+ enum i40e_admin_queue_err aq_status;
+ int fcnt;
- i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL);
- aq_err = hw->aq.asq_last_status;
+ i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status);
fcnt = i40e_update_filter_state(num_add, list, add_head);
if (fcnt != num_add) {
@@ -2189,17 +2201,19 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
dev_warn(&vsi->back->pdev->dev,
"Error %s adding RX filters on %s, promiscuous mode forced on\n",
- i40e_aq_str(hw, aq_err), vsi_name);
+ i40e_aq_str(hw, aq_status), vsi_name);
} else if (vsi->type == I40E_VSI_SRIOV ||
vsi->type == I40E_VSI_VMDQ1 ||
vsi->type == I40E_VSI_VMDQ2) {
dev_warn(&vsi->back->pdev->dev,
"Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
- i40e_aq_str(hw, aq_err), vsi_name, vsi_name);
+ i40e_aq_str(hw, aq_status), vsi_name,
+ vsi_name);
} else {
dev_warn(&vsi->back->pdev->dev,
"Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
- i40e_aq_str(hw, aq_err), vsi_name, vsi->type);
+ i40e_aq_str(hw, aq_status), vsi_name,
+ vsi->type);
}
}
}
@@ -12722,7 +12736,8 @@ static int i40e_set_features(struct net_device *netdev,
else
i40e_vlan_stripping_disable(vsi);
- if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+ if (!(features & NETIF_F_HW_TC) &&
+ (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
dev_err(&pf->pdev->dev,
"Offloaded tc filters active, can't turn hw_tc_offload off");
return -EINVAL;
@@ -13478,6 +13493,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+ netdev->features &= ~NETIF_F_HW_TC;
+
if (vsi->type == I40E_VSI_MAIN) {
SET_NETDEV_DEV(netdev, &pf->pdev->dev);
ether_addr_copy(mac_addr, hw->mac.perm_addr);
@@ -15341,12 +15358,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* set up for high or low dma */
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "DMA configuration failed: 0x%x\n", err);
- goto err_dma;
- }
+ dev_err(&pdev->dev,
+ "DMA configuration failed: 0x%x\n", err);
+ goto err_dma;
}
/* set up pci connections */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 9241b6005ad3..ebdcde6f1aeb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -27,10 +27,25 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
void *buff, /* can be NULL */ u16 buff_size,
struct i40e_asq_cmd_details *cmd_details);
i40e_status
+i40e_asq_send_command_v2(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status);
+i40e_status
i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
void *buff, /* can be NULL */ u16 buff_size,
struct i40e_asq_cmd_details *cmd_details,
bool is_atomic_context);
+i40e_status
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details,
+ bool is_atomic_context,
+ enum i40e_admin_queue_err *aq_status);
/* debug function for adminq */
void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
@@ -150,9 +165,19 @@ i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
struct i40e_aqc_add_macvlan_element_data *mv_list,
u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status);
i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
struct i40e_aqc_remove_macvlan_element_data *mv_list,
u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_remove_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details,
+ enum i40e_admin_queue_err *aq_status);
i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
struct i40e_asq_cmd_details *cmd_details,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 66cc79500c10..0eae5858f2fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
i40e_clean_tx_ring(tx_ring);
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
- kfree(tx_ring->xsk_descs);
- tx_ring->xsk_descs = NULL;
if (tx_ring->desc) {
dma_free_coherent(tx_ring->dev, tx_ring->size,
@@ -1382,8 +1380,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
new_buff->page_offset = old_buff->page_offset;
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
- rx_ring->rx_stats.page_reuse_count++;
-
/* clear contents of buffer_info */
old_buff->page = NULL;
}
@@ -1433,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!tx_ring->tx_bi)
goto err;
- if (ring_is_xdp(tx_ring)) {
- tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
- GFP_KERNEL);
- if (!tx_ring->xsk_descs)
- goto err;
- }
-
u64_stats_init(&tx_ring->syncp);
/* round up to nearest 4K */
@@ -1463,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return 0;
err:
- kfree(tx_ring->xsk_descs);
- tx_ring->xsk_descs = NULL;
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
return -ENOMEM;
@@ -1675,6 +1662,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
return false;
}
+ rx_ring->rx_stats.page_alloc_count++;
+
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
i40e_rx_pg_size(rx_ring),
@@ -1982,32 +1971,43 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
/**
* i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buffer: buffer containing the page
+ * @rx_stats: rx stats structure for the rx ring
* @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling i40e_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
* pointing to; otherwise, the DMA mapping needs to be destroyed and
- * page freed
+ * page freed.
+ *
+ * rx_stats will be updated to indicate whether the page was waived
+ * or busy if it could not be reused.
*/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
+ struct i40e_rx_queue_stats *rx_stats,
int rx_buffer_pgcnt)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
/* Is any reuse possible? */
- if (!dev_page_is_reusable(page))
+ if (!dev_page_is_reusable(page)) {
+ rx_stats->page_waive_count++;
return false;
+ }
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) {
+ rx_stats->page_busy_count++;
return false;
+ }
#else
#define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
- if (rx_buffer->page_offset > I40E_LAST_OFFSET)
+ if (rx_buffer->page_offset > I40E_LAST_OFFSET) {
+ rx_stats->page_busy_count++;
return false;
+ }
#endif
/* If we have drained the page fragment pool we need to update
@@ -2237,7 +2237,7 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
int rx_buffer_pgcnt)
{
- if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+ if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
} else {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index bfc2845c99d1..c471c2da313c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -298,7 +298,9 @@ struct i40e_rx_queue_stats {
u64 alloc_page_failed;
u64 alloc_buff_failed;
u64 page_reuse_count;
- u64 realloc_count;
+ u64 page_alloc_count;
+ u64 page_waive_count;
+ u64 page_busy_count;
};
enum i40e_ring_state_t {
@@ -390,7 +392,6 @@ struct i40e_ring {
u16 rx_offset;
struct xdp_rxq_info xdp_rxq;
struct xsk_buff_pool *xsk_pool;
- struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 945b1bb9c6f4..5a997b0d07d8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -241,21 +241,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
struct xdp_buff *xdp)
{
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
- unsigned int datasize = xdp->data_end - xdp->data;
struct sk_buff *skb;
+ net_prefetch(xdp->data_meta);
+
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- xdp->data_end - xdp->data_hard_start,
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
goto out;
- skb_reserve(skb, xdp->data - xdp->data_hard_start);
- memcpy(__skb_put(skb, datasize), xdp->data, datasize);
- if (metasize)
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
out:
xsk_buff_free(xdp);
@@ -467,11 +471,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
**/
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
{
- struct xdp_desc *descs = xdp_ring->xsk_descs;
+ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;
- nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
+ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
if (!nb_pkts)
return true;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8125b9120615..b0bd95c85480 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -4368,12 +4368,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "DMA configuration failed: 0x%x\n", err);
- goto err_dma;
- }
+ dev_err(&pdev->dev,
+ "DMA configuration failed: 0x%x\n", err);
+ goto err_dma;
}
err = pci_request_regions(pdev, iavf_driver_name);
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index c36faa7d1471..389fff70d22e 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -18,8 +18,12 @@ ice-y := ice_main.o \
ice_txrx_lib.o \
ice_txrx.o \
ice_fltr.o \
+ ice_pf_vsi_vlan_ops.o \
+ ice_vsi_vlan_ops.o \
+ ice_vsi_vlan_lib.o \
ice_fdir.o \
ice_ethtool_fdir.o \
+ ice_vlan_mode.o \
ice_flex_pipe.o \
ice_flow.o \
ice_idc.o \
@@ -29,8 +33,12 @@ ice-y := ice_main.o \
ice_ethtool.o \
ice_repr.o \
ice_tc_lib.o
-ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o
-ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o
+ice-$(CONFIG_PCI_IOV) += \
+ ice_virtchnl_allowlist.o \
+ ice_virtchnl_fdir.o \
+ ice_sriov.o \
+ ice_vf_vsi_vlan_ops.o \
+ ice_virtchnl_pf.o
ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index a9fa701aaa95..827fcb5e0d4c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -72,6 +72,7 @@
#include "ice_repr.h"
#include "ice_eswitch.h"
#include "ice_lag.h"
+#include "ice_vsi_vlan_ops.h"
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
@@ -368,6 +369,8 @@ struct ice_vsi {
u8 irqs_ready:1;
u8 current_isup:1; /* Sync 'link up' logging */
u8 stat_offsets_loaded:1;
+ struct ice_vsi_vlan_ops inner_vlan_ops;
+ struct ice_vsi_vlan_ops outer_vlan_ops;
u16 num_vlan;
/* queue information */
@@ -482,6 +485,7 @@ enum ice_pf_flags {
ICE_FLAG_LEGACY_RX,
ICE_FLAG_VF_TRUE_PROMISC_ENA,
ICE_FLAG_MDD_AUTO_RESET_VF,
+ ICE_FLAG_VF_VLAN_PRUNING,
ICE_FLAG_LINK_LENIENT_MODE_ENA,
ICE_FLAG_PLUG_AUX_DEV,
ICE_PF_FLAGS_NBITS /* must be last */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ad1dcfa5ff65..fd8ee5b7f596 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -226,6 +226,15 @@ struct ice_aqc_get_sw_cfg_resp_elem {
#define ICE_AQC_GET_SW_CONF_RESP_IS_VF BIT(15)
};
+/* Set Port parameters, (direct, 0x0203) */
+struct ice_aqc_set_port_params {
+ __le16 cmd_flags;
+#define ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA BIT(2)
+ __le16 bad_frame_vsi;
+ __le16 swid;
+ u8 reserved[10];
+};
+
/* These resource type defines are used for all switch resource
* commands where a resource type is required, such as:
* Get Resource Allocation command (indirect 0x0204)
@@ -283,6 +292,40 @@ struct ice_aqc_alloc_free_res_elem {
struct ice_aqc_res_elem elem[];
};
+/* Request buffer for Set VLAN Mode AQ command (indirect 0x020C) */
+struct ice_aqc_set_vlan_mode {
+ u8 reserved;
+ u8 l2tag_prio_tagging;
+#define ICE_AQ_VLAN_PRIO_TAG_S 0
+#define ICE_AQ_VLAN_PRIO_TAG_M (0x7 << ICE_AQ_VLAN_PRIO_TAG_S)
+#define ICE_AQ_VLAN_PRIO_TAG_NOT_SUPPORTED 0x0
+#define ICE_AQ_VLAN_PRIO_TAG_STAG 0x1
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG 0x2
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_VLAN 0x3
+#define ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG 0x4
+#define ICE_AQ_VLAN_PRIO_TAG_MAX 0x4
+#define ICE_AQ_VLAN_PRIO_TAG_ERROR 0x7
+ u8 l2tag_reserved[64];
+ u8 rdma_packet;
+#define ICE_AQ_VLAN_RDMA_TAG_S 0
+#define ICE_AQ_VLAN_RDMA_TAG_M (0x3F << ICE_AQ_VLAN_RDMA_TAG_S)
+#define ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING 0x10
+#define ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING 0x1A
+ u8 rdma_reserved[2];
+ u8 mng_vlan_prot_id;
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER 0x10
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER 0x11
+ u8 prot_id_reserved[30];
+};
+
+/* Response buffer for Get VLAN Mode AQ command (indirect 0x020D) */
+struct ice_aqc_get_vlan_mode {
+ u8 vlan_mode;
+#define ICE_AQ_VLAN_MODE_DVM_ENA BIT(0)
+ u8 l2tag_prio_tagging;
+ u8 reserved[98];
+};
+
/* Add VSI (indirect 0x0210)
* Update VSI (indirect 0x0211)
* Get VSI (indirect 0x0212)
@@ -343,108 +386,113 @@ struct ice_aqc_vsi_props {
#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE BIT(7)
u8 sw_flags2;
#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S 0
-#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M \
- (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA BIT(0)
#define ICE_AQ_VSI_SW_FLAG_LAN_ENA BIT(4)
u8 veb_stat_id;
#define ICE_AQ_VSI_SW_VEB_STAT_ID_S 0
-#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID BIT(5)
/* security section */
u8 sec_flags;
#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD BIT(0)
#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF BIT(2)
-#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4
-#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA BIT(0)
u8 sec_reserved;
/* VLAN section */
- __le16 pvid; /* VLANS include priority bits */
- u8 pvlan_reserved[2];
- u8 vlan_flags;
-#define ICE_AQ_VSI_VLAN_MODE_S 0
-#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S)
-#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1
-#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2
-#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3
-#define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2)
-#define ICE_AQ_VSI_VLAN_EMOD_S 3
-#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
-#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S)
-#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S)
-#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S)
-#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
- u8 pvlan_reserved2[3];
+ __le16 port_based_inner_vlan; /* VLANS include priority bits */
+ u8 inner_vlan_reserved[2];
+ u8 inner_vlan_flags;
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_S 0
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTTAGGED 0x2
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL 0x3
+#define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+ u8 inner_vlan_reserved2[3];
/* ingress egress up sections */
__le32 ingress_table; /* bitmap, 3 bits per up */
-#define ICE_AQ_VSI_UP_TABLE_UP0_S 0
-#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
-#define ICE_AQ_VSI_UP_TABLE_UP1_S 3
-#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
-#define ICE_AQ_VSI_UP_TABLE_UP2_S 6
-#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
-#define ICE_AQ_VSI_UP_TABLE_UP3_S 9
-#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
-#define ICE_AQ_VSI_UP_TABLE_UP4_S 12
-#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
-#define ICE_AQ_VSI_UP_TABLE_UP5_S 15
-#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
-#define ICE_AQ_VSI_UP_TABLE_UP6_S 18
-#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
-#define ICE_AQ_VSI_UP_TABLE_UP7_S 21
-#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+#define ICE_AQ_VSI_UP_TABLE_UP0_S 0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S 3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S 6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S 9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S 12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S 15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S 18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S 21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
__le32 egress_table; /* same defines as for ingress table */
/* outer tags section */
- __le16 outer_tag;
- u8 outer_tag_flags;
-#define ICE_AQ_VSI_OUTER_TAG_MODE_S 0
-#define ICE_AQ_VSI_OUTER_TAG_MODE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S)
-#define ICE_AQ_VSI_OUTER_TAG_NOTHING 0x0
-#define ICE_AQ_VSI_OUTER_TAG_REMOVE 0x1
-#define ICE_AQ_VSI_OUTER_TAG_COPY 0x2
-#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2
-#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
-#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0
-#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1
-#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2
-#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3
-#define ICE_AQ_VSI_OUTER_TAG_INSERT BIT(4)
-#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6)
- u8 outer_tag_reserved;
+ __le16 port_based_outer_vlan;
+ u8 outer_vlan_flags;
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_S 0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH 0x0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_UP 0x1
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW 0x2
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING 0x3
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3
+#define ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT BIT(4)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S 5
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTTAGGED 0x2
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL 0x3
+#define ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC BIT(7)
+ u8 outer_vlan_reserved;
/* queue mapping section */
__le16 mapping_flags;
-#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0
-#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0)
+#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0)
__le16 q_mapping[16];
-#define ICE_AQ_VSI_Q_S 0
-#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S)
+#define ICE_AQ_VSI_Q_S 0
+#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S)
__le16 tc_mapping[8];
-#define ICE_AQ_VSI_TC_Q_OFFSET_S 0
-#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
-#define ICE_AQ_VSI_TC_Q_NUM_S 11
-#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+#define ICE_AQ_VSI_TC_Q_OFFSET_S 0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S 11
+#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S)
/* queueing option section */
u8 q_opt_rss;
-#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0
-#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0
-#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2
-#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3
-#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2
-#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6
-#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
-#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
u8 q_opt_tc;
-#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0
-#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
-#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7)
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7)
u8 q_opt_flags;
-#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0)
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0)
u8 q_opt_reserved[3];
/* outer up section */
__le32 outer_up_table; /* same structure and defines as ingress tbl */
@@ -452,27 +500,27 @@ struct ice_aqc_vsi_props {
__le16 sect_10_reserved;
/* flow director section */
__le16 fd_options;
-#define ICE_AQ_VSI_FD_ENABLE BIT(0)
-#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1)
-#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3)
+#define ICE_AQ_VSI_FD_ENABLE BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3)
__le16 max_fd_fltr_dedicated;
__le16 max_fd_fltr_shared;
__le16 fd_def_q;
-#define ICE_AQ_VSI_FD_DEF_Q_S 0
-#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
-#define ICE_AQ_VSI_FD_DEF_GRP_S 12
-#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+#define ICE_AQ_VSI_FD_DEF_Q_S 0
+#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S 12
+#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
__le16 fd_report_opt;
-#define ICE_AQ_VSI_FD_REPORT_Q_S 0
-#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
-#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12
-#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
-#define ICE_AQ_VSI_FD_DEF_DROP BIT(15)
+#define ICE_AQ_VSI_FD_REPORT_Q_S 0
+#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP BIT(15)
/* PASID section */
__le32 pasid_id;
-#define ICE_AQ_VSI_PASID_ID_S 0
-#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
-#define ICE_AQ_VSI_PASID_ID_VALID BIT(31)
+#define ICE_AQ_VSI_PASID_ID_S 0
+#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID BIT(31)
u8 reserved[24];
};
@@ -489,9 +537,13 @@ struct ice_aqc_add_get_recipe {
struct ice_aqc_recipe_content {
u8 rid;
+#define ICE_AQ_RECIPE_ID_S 0
+#define ICE_AQ_RECIPE_ID_M (0x3F << ICE_AQ_RECIPE_ID_S)
#define ICE_AQ_RECIPE_ID_IS_ROOT BIT(7)
#define ICE_AQ_SW_ID_LKUP_IDX 0
u8 lkup_indx[5];
+#define ICE_AQ_RECIPE_LKUP_DATA_S 0
+#define ICE_AQ_RECIPE_LKUP_DATA_M (0x3F << ICE_AQ_RECIPE_LKUP_DATA_S)
#define ICE_AQ_RECIPE_LKUP_IGNORE BIT(7)
#define ICE_AQ_SW_ID_LKUP_MASK 0x00FF
__le16 mask[5];
@@ -502,15 +554,25 @@ struct ice_aqc_recipe_content {
u8 rsvd0[3];
u8 act_ctrl_join_priority;
u8 act_ctrl_fwd_priority;
+#define ICE_AQ_RECIPE_FWD_PRIORITY_S 0
+#define ICE_AQ_RECIPE_FWD_PRIORITY_M (0xF << ICE_AQ_RECIPE_FWD_PRIORITY_S)
u8 act_ctrl;
+#define ICE_AQ_RECIPE_ACT_NEED_PASS_L2 BIT(0)
+#define ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2 BIT(1)
#define ICE_AQ_RECIPE_ACT_INV_ACT BIT(2)
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_S 4
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_M (0x3 << ICE_AQ_RECIPE_ACT_PRUNE_INDX_S)
u8 rsvd1;
__le32 dflt_act;
+#define ICE_AQ_RECIPE_DFLT_ACT_S 0
+#define ICE_AQ_RECIPE_DFLT_ACT_M (0x7FFFF << ICE_AQ_RECIPE_DFLT_ACT_S)
+#define ICE_AQ_RECIPE_DFLT_ACT_VALID BIT(31)
};
struct ice_aqc_recipe_data_elem {
u8 recipe_indx;
u8 resp_bits;
+#define ICE_AQ_RECIPE_WAS_UPDATED BIT(0)
u8 rsvd0[2];
u8 recipe_bitmap[8];
u8 rsvd1[4];
@@ -1883,7 +1945,7 @@ struct ice_aqc_get_clear_fw_log {
};
/* Download Package (indirect 0x0C40) */
-/* Also used for Update Package (indirect 0x0C42) */
+/* Also used for Update Package (indirect 0x0C41 and 0x0C42) */
struct ice_aqc_download_pkg {
u8 flags;
#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF 0x01
@@ -2009,6 +2071,7 @@ struct ice_aq_desc {
struct ice_aqc_sff_eeprom read_write_sff_param;
struct ice_aqc_set_port_id_led set_port_id_led;
struct ice_aqc_get_sw_cfg get_sw_conf;
+ struct ice_aqc_set_port_params set_port_params;
struct ice_aqc_sw_rules sw_rules;
struct ice_aqc_add_get_recipe add_get_recipe;
struct ice_aqc_recipe_to_profile recipe_to_profile;
@@ -2110,10 +2173,13 @@ enum ice_adminq_opc {
/* internal switch commands */
ice_aqc_opc_get_sw_cfg = 0x0200,
+ ice_aqc_opc_set_port_params = 0x0203,
/* Alloc/Free/Get Resources */
ice_aqc_opc_alloc_res = 0x0208,
ice_aqc_opc_free_res = 0x0209,
+ ice_aqc_opc_set_vlan_mode_parameters = 0x020C,
+ ice_aqc_opc_get_vlan_mode_parameters = 0x020D,
/* VSI commands */
ice_aqc_opc_add_vsi = 0x0210,
@@ -2204,6 +2270,7 @@ enum ice_adminq_opc {
/* package commands */
ice_aqc_opc_download_pkg = 0x0C40,
+ ice_aqc_opc_upload_section = 0x0C41,
ice_aqc_opc_update_pkg = 0x0C42,
ice_aqc_opc_get_pkg_info_list = 0x0C43,
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 1a5ece3bce79..2360e6abdb1e 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -5,6 +5,7 @@
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
+#include "ice_virtchnl_pf.h"
static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring)
{
@@ -418,8 +419,22 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
*/
rlan_ctx.crcstrip = 1;
- /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
- rlan_ctx.l2tsel = 1;
+ /* L2TSEL flag defines the reported L2 Tags in the receive descriptor
+ * and it needs to remain 1 for non-DVM capable configurations to not
+ * break backward compatibility for VF drivers. Setting this field to 0
+ * will cause the single/outer VLAN tag to be stripped to the L2TAG2_2ND
+ * field in the Rx descriptor. Setting it to 1 allows the VLAN tag to
+ * be stripped in L2TAG1 of the Rx descriptor, which is where VFs will
+ * check for the tag
+ */
+ if (ice_is_dvm_ena(hw))
+ if (vsi->type == ICE_VSI_VF &&
+ ice_vf_is_port_vlan_ena(&vsi->back->vf[vsi->vf_id]))
+ rlan_ctx.l2tsel = 1;
+ else
+ rlan_ctx.l2tsel = 0;
+ else
+ rlan_ctx.l2tsel = 1;
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index a6d7d3eff186..c57e5fc41cf8 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1518,16 +1518,27 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
/* When a package download is in process (i.e. when the firmware's
* Global Configuration Lock resource is held), only the Download
- * Package, Get Version, Get Package Info List and Release Resource
- * (with resource ID set to Global Config Lock) AdminQ commands are
- * allowed; all others must block until the package download completes
- * and the Global Config Lock is released. See also
- * ice_acquire_global_cfg_lock().
+ * Package, Get Version, Get Package Info List, Upload Section,
+ * Update Package, Set Port Parameters, Get/Set VLAN Mode Parameters,
+ * Add Recipe, Set Recipes to Profile Association, Get Recipe, and Get
+ * Recipes to Profile Association, and Release Resource (with resource
+ * ID set to Global Config Lock) AdminQ commands are allowed; all others
+ * must block until the package download completes and the Global Config
+ * Lock is released. See also ice_acquire_global_cfg_lock().
*/
switch (le16_to_cpu(desc->opcode)) {
case ice_aqc_opc_download_pkg:
case ice_aqc_opc_get_pkg_info_list:
case ice_aqc_opc_get_ver:
+ case ice_aqc_opc_upload_section:
+ case ice_aqc_opc_update_pkg:
+ case ice_aqc_opc_set_port_params:
+ case ice_aqc_opc_get_vlan_mode_parameters:
+ case ice_aqc_opc_set_vlan_mode_parameters:
+ case ice_aqc_opc_add_recipe:
+ case ice_aqc_opc_recipe_to_profile:
+ case ice_aqc_opc_get_recipe:
+ case ice_aqc_opc_get_recipe_to_profile:
break;
case ice_aqc_opc_release_res:
if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK)
@@ -2737,6 +2748,34 @@ void ice_clear_pxe_mode(struct ice_hw *hw)
}
/**
+ * ice_aq_set_port_params - set physical port parameters.
+ * @pi: pointer to the port info struct
+ * @double_vlan: if set double VLAN is enabled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set Physical port parameters (0x0203)
+ */
+int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+ struct ice_sq_cd *cd)
+
+{
+ struct ice_aqc_set_port_params *cmd;
+ struct ice_hw *hw = pi->hw;
+ struct ice_aq_desc desc;
+ u16 cmd_flags = 0;
+
+ cmd = &desc.params.set_port_params;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
+ if (double_vlan)
+ cmd_flags |= ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA;
+ cmd->cmd_flags = cpu_to_le16(cmd_flags);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
* ice_get_link_speed_based_on_phy_type - returns link speed
* @phy_type_low: lower part of phy_type
* @phy_type_high: higher part of phy_type
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 1c57097ddf0b..d28749edd92f 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -85,6 +85,9 @@ int
ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
struct ice_sq_cd *cd);
int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+ struct ice_sq_cd *cd);
+int
ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
struct ice_aqc_get_phy_caps_data *caps,
struct ice_sq_cd *cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index b94d8daeaa58..add90e75f05c 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -916,7 +916,8 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
return;
/* Insert 802.1p priority into VLAN header */
- if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN) ||
+ if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN ||
+ first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) ||
skb->priority != TC_PRIO_CONTROL) {
first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
/* Mask the lower 3 bits to set the 802.1p priority */
@@ -925,7 +926,10 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
/* if this is not already set it means a VLAN 0 + priority needs
* to be offloaded
*/
- first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+ if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+ first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+ else
+ first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index 864692b157b6..e1cb6682eee2 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -115,9 +115,12 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
struct net_device *uplink_netdev = uplink_vsi->netdev;
struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi_vlan_ops *vlan_ops;
bool rule_added = false;
- ice_vsi_manage_vlan_stripping(ctrl_vsi, false);
+ vlan_ops = ice_get_compat_vsi_vlan_ops(ctrl_vsi);
+ if (vlan_ops->dis_stripping(ctrl_vsi))
+ return -ENODEV;
ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
@@ -126,7 +129,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
__dev_mc_unsync(uplink_netdev, NULL);
netif_addr_unlock_bh(uplink_netdev);
- if (ice_vsi_add_vlan(uplink_vsi, 0, ICE_FWD_TO_VSI))
+ if (ice_vsi_add_vlan_zero(uplink_vsi))
goto err_def_rx;
if (!ice_is_dflt_vsi_in_use(uplink_vsi->vsw)) {
@@ -230,7 +233,7 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf)
goto err;
}
- if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI)) {
+ if (ice_vsi_add_vlan_zero(vsi)) {
ice_fltr_add_mac_and_broadcast(vsi,
vf->hw_lan_addr.addr,
ICE_FWD_TO_VSI);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e2e3ef7fba7f..a3492754d0d3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -164,6 +164,7 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("vf-true-promisc-support",
ICE_FLAG_VF_TRUE_PROMISC_ENA),
ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
+ ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING),
ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
@@ -1295,6 +1296,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
ret = -EAGAIN;
}
+
+ if (test_bit(ICE_FLAG_VF_VLAN_PRUNING, change_flags) &&
+ pf->num_alloc_vfs) {
+ dev_err(dev, "vf-vlan-pruning: VLAN pruning cannot be changed while VFs are active.\n");
+ /* toggle bit back to previous state */
+ change_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags);
+ ret = -EOPNOTSUPP;
+ }
ethtool_exit:
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
@@ -2803,6 +2812,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
+ xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 4deb2c9446ec..38fe0a7e6975 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -5,9 +5,17 @@
#include "ice_flex_pipe.h"
#include "ice_flow.h"
+/* For supporting double VLAN mode, it is necessary to enable or disable certain
+ * boost tcam entries. The metadata labels names that match the following
+ * prefixes will be saved to allow enabling double VLAN mode.
+ */
+#define ICE_DVM_PRE "BOOST_MAC_VLAN_DVM" /* enable these entries */
+#define ICE_SVM_PRE "BOOST_MAC_VLAN_SVM" /* disable these entries */
+
/* To support tunneling entries by PF, the package will append the PF number to
* the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc.
*/
+#define ICE_TNL_PRE "TNL_"
static const struct ice_tunnel_type_scan tnls[] = {
{ TNL_VXLAN, "TNL_VXLAN_PF" },
{ TNL_GENEVE, "TNL_GENEVE_PF" },
@@ -523,6 +531,55 @@ ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state,
}
/**
+ * ice_add_tunnel_hint
+ * @hw: pointer to the HW structure
+ * @label_name: label text
+ * @val: value of the tunnel port boost entry
+ */
+static void ice_add_tunnel_hint(struct ice_hw *hw, char *label_name, u16 val)
+{
+ if (hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
+ u16 i;
+
+ for (i = 0; tnls[i].type != TNL_LAST; i++) {
+ size_t len = strlen(tnls[i].label_prefix);
+
+ /* Look for matching label start, before continuing */
+ if (strncmp(label_name, tnls[i].label_prefix, len))
+ continue;
+
+ /* Make sure this label matches our PF. Note that the PF
+ * character ('0' - '7') will be located where our
+ * prefix string's null terminator is located.
+ */
+ if ((label_name[len] - '0') == hw->pf_id) {
+ hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
+ hw->tnl.tbl[hw->tnl.count].valid = false;
+ hw->tnl.tbl[hw->tnl.count].boost_addr = val;
+ hw->tnl.tbl[hw->tnl.count].port = 0;
+ hw->tnl.count++;
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * ice_add_dvm_hint
+ * @hw: pointer to the HW structure
+ * @val: value of the boost entry
+ * @enable: true if entry needs to be enabled, or false if needs to be disabled
+ */
+static void ice_add_dvm_hint(struct ice_hw *hw, u16 val, bool enable)
+{
+ if (hw->dvm_upd.count < ICE_DVM_MAX_ENTRIES) {
+ hw->dvm_upd.tbl[hw->dvm_upd.count].boost_addr = val;
+ hw->dvm_upd.tbl[hw->dvm_upd.count].enable = enable;
+ hw->dvm_upd.count++;
+ }
+}
+
+/**
* ice_init_pkg_hints
* @hw: pointer to the HW structure
* @ice_seg: pointer to the segment of the package scan (non-NULL)
@@ -548,32 +605,23 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state,
&val);
- while (label_name && hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
- for (i = 0; tnls[i].type != TNL_LAST; i++) {
- size_t len = strlen(tnls[i].label_prefix);
+ while (label_name) {
+ if (!strncmp(label_name, ICE_TNL_PRE, strlen(ICE_TNL_PRE)))
+ /* check for a tunnel entry */
+ ice_add_tunnel_hint(hw, label_name, val);
- /* Look for matching label start, before continuing */
- if (strncmp(label_name, tnls[i].label_prefix, len))
- continue;
+ /* check for a dvm mode entry */
+ else if (!strncmp(label_name, ICE_DVM_PRE, strlen(ICE_DVM_PRE)))
+ ice_add_dvm_hint(hw, val, true);
- /* Make sure this label matches our PF. Note that the PF
- * character ('0' - '7') will be located where our
- * prefix string's null terminator is located.
- */
- if ((label_name[len] - '0') == hw->pf_id) {
- hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
- hw->tnl.tbl[hw->tnl.count].valid = false;
- hw->tnl.tbl[hw->tnl.count].boost_addr = val;
- hw->tnl.tbl[hw->tnl.count].port = 0;
- hw->tnl.count++;
- break;
- }
- }
+ /* check for a svm mode entry */
+ else if (!strncmp(label_name, ICE_SVM_PRE, strlen(ICE_SVM_PRE)))
+ ice_add_dvm_hint(hw, val, false);
label_name = ice_enum_labels(NULL, 0, &state, &val);
}
- /* Cache the appropriate boost TCAM entry pointers */
+ /* Cache the appropriate boost TCAM entry pointers for tunnels */
for (i = 0; i < hw->tnl.count; i++) {
ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
&hw->tnl.tbl[i].boost_entry);
@@ -583,6 +631,11 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
hw->tnl.valid_count[hw->tnl.tbl[i].type]++;
}
}
+
+ /* Cache the appropriate boost TCAM entry pointers for DVM and SVM */
+ for (i = 0; i < hw->dvm_upd.count; i++)
+ ice_find_boost_entry(ice_seg, hw->dvm_upd.tbl[i].boost_addr,
+ &hw->dvm_upd.tbl[i].boost_entry);
}
/* Key creation */
@@ -874,6 +927,27 @@ ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
}
/**
+ * ice_aq_upload_section
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer which will receive the section
+ * @buf_size: the size of the package buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Upload Section (0x0C41)
+ */
+int
+ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_upload_section);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+}
+
+/**
* ice_aq_update_pkg
* @hw: pointer to the hardware structure
* @pkg_buf: the package cmd buffer
@@ -957,25 +1031,21 @@ ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
}
/**
- * ice_update_pkg
+ * ice_update_pkg_no_lock
* @hw: pointer to the hardware structure
* @bufs: pointer to an array of buffers
* @count: the number of buffers in the array
- *
- * Obtains change lock and updates package.
*/
-static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+static int
+ice_update_pkg_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
{
- u32 offset, info, i;
- int status;
-
- status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
- if (status)
- return status;
+ int status = 0;
+ u32 i;
for (i = 0; i < count; i++) {
struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i);
bool last = ((i + 1) == count);
+ u32 offset, info;
status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end),
last, &offset, &info, NULL);
@@ -987,6 +1057,27 @@ static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
}
}
+ return status;
+}
+
+/**
+ * ice_update_pkg
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains change lock and updates package.
+ */
+static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+ int status;
+
+ status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ if (status)
+ return status;
+
+ status = ice_update_pkg_no_lock(hw, bufs, count);
+
ice_release_change_lock(hw);
return status;
@@ -1080,6 +1171,13 @@ ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
break;
}
+ if (!status) {
+ status = ice_set_vlan_mode(hw);
+ if (status)
+ ice_debug(hw, ICE_DBG_PKG, "Failed to set VLAN mode: err %d\n",
+ status);
+ }
+
ice_release_global_cfg_lock(hw);
return state;
@@ -1117,6 +1215,7 @@ static enum ice_ddp_state
ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
{
struct ice_buf_table *ice_buf_tbl;
+ int status;
ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
ice_seg->hdr.seg_format_ver.major,
@@ -1133,8 +1232,12 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
le32_to_cpu(ice_buf_tbl->buf_count));
- return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
- le32_to_cpu(ice_buf_tbl->buf_count));
+ status = ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+ le32_to_cpu(ice_buf_tbl->buf_count));
+
+ ice_post_pkg_dwnld_vlan_mode_cfg(hw);
+
+ return status;
}
/**
@@ -1897,7 +2000,7 @@ void ice_init_prof_result_bm(struct ice_hw *hw)
*
* Frees a package buffer
*/
-static void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
{
devm_kfree(ice_hw_to_dev(hw), bld);
}
@@ -1997,6 +2100,43 @@ ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size)
}
/**
+ * ice_pkg_buf_alloc_single_section
+ * @hw: pointer to the HW structure
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ * @section: returns pointer to the section
+ *
+ * Allocates a package buffer with a single section.
+ * Note: all package contents must be in Little Endian form.
+ */
+struct ice_buf_build *
+ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size,
+ void **section)
+{
+ struct ice_buf_build *buf;
+
+ if (!section)
+ return NULL;
+
+ buf = ice_pkg_buf_alloc(hw);
+ if (!buf)
+ return NULL;
+
+ if (ice_pkg_buf_reserve_section(buf, 1))
+ goto ice_pkg_buf_alloc_single_section_err;
+
+ *section = ice_pkg_buf_alloc_section(buf, type, size);
+ if (!*section)
+ goto ice_pkg_buf_alloc_single_section_err;
+
+ return buf;
+
+ice_pkg_buf_alloc_single_section_err:
+ ice_pkg_buf_free(hw, buf);
+ return NULL;
+}
+
+/**
* ice_pkg_buf_get_active_sections
* @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
*
@@ -2023,7 +2163,7 @@ static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld)
*
* Return a pointer to the buffer's header
*/
-static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
{
if (!bld)
return NULL;
@@ -2060,6 +2200,89 @@ ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
}
/**
+ * ice_upd_dvm_boost_entry
+ * @hw: pointer to the HW structure
+ * @entry: pointer to double vlan boost entry info
+ */
+static int
+ice_upd_dvm_boost_entry(struct ice_hw *hw, struct ice_dvm_entry *entry)
+{
+ struct ice_boost_tcam_section *sect_rx, *sect_tx;
+ int status = -ENOSPC;
+ struct ice_buf_build *bld;
+ u8 val, dc, nm;
+
+ bld = ice_pkg_buf_alloc(hw);
+ if (!bld)
+ return -ENOMEM;
+
+ /* allocate 2 sections, one for Rx parser, one for Tx parser */
+ if (ice_pkg_buf_reserve_section(bld, 2))
+ goto ice_upd_dvm_boost_entry_err;
+
+ sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+ struct_size(sect_rx, tcam, 1));
+ if (!sect_rx)
+ goto ice_upd_dvm_boost_entry_err;
+ sect_rx->count = cpu_to_le16(1);
+
+ sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+ struct_size(sect_tx, tcam, 1));
+ if (!sect_tx)
+ goto ice_upd_dvm_boost_entry_err;
+ sect_tx->count = cpu_to_le16(1);
+
+ /* copy original boost entry to update package buffer */
+ memcpy(sect_rx->tcam, entry->boost_entry, sizeof(*sect_rx->tcam));
+
+ /* re-write the don't care and never match bits accordingly */
+ if (entry->enable) {
+ /* all bits are don't care */
+ val = 0x00;
+ dc = 0xFF;
+ nm = 0x00;
+ } else {
+ /* disable, one never match bit, the rest are don't care */
+ val = 0x00;
+ dc = 0xF7;
+ nm = 0x08;
+ }
+
+ ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+ &val, NULL, &dc, &nm, 0, sizeof(u8));
+
+ /* exact copy of entry to Tx section entry */
+ memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+ status = ice_update_pkg_no_lock(hw, ice_pkg_buf(bld), 1);
+
+ice_upd_dvm_boost_entry_err:
+ ice_pkg_buf_free(hw, bld);
+
+ return status;
+}
+
+/**
+ * ice_set_dvm_boost_entries
+ * @hw: pointer to the HW structure
+ *
+ * Enable double vlan by updating the appropriate boost tcam entries.
+ */
+int ice_set_dvm_boost_entries(struct ice_hw *hw)
+{
+ int status;
+ u16 i;
+
+ for (i = 0; i < hw->dvm_upd.count; i++) {
+ status = ice_upd_dvm_boost_entry(hw, &hw->dvm_upd.tbl[i]);
+ if (status)
+ return status;
+ }
+
+ return 0;
+}
+
+/**
* ice_tunnel_idx_to_entry - convert linear index to the sparse one
* @hw: pointer to the HW structure
* @type: type of tunnel
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 6cbc29bcb02f..2fd5312494c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -89,6 +89,12 @@ ice_init_prof_result_bm(struct ice_hw *hw);
int
ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt,
unsigned long *bm, struct list_head *fv_list);
+int
+ice_pkg_buf_unreserve_section(struct ice_buf_build *bld, u16 count);
+u16 ice_pkg_buf_get_free_space(struct ice_buf_build *bld);
+int
+ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+ u16 buf_size, struct ice_sq_cd *cd);
bool
ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
enum ice_tunnel_type type);
@@ -96,6 +102,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
unsigned int idx, struct udp_tunnel_info *ti);
int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
unsigned int idx, struct udp_tunnel_info *ti);
+int ice_set_dvm_boost_entries(struct ice_hw *hw);
/* Rx parser PTYPE functions */
bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype);
@@ -119,4 +126,10 @@ void ice_fill_blk_tbls(struct ice_hw *hw);
void ice_clear_hw_tbls(struct ice_hw *hw);
void ice_free_hw_tbls(struct ice_hw *hw);
int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id);
+struct ice_buf_build *
+ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size,
+ void **section);
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld);
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld);
+
#endif /* _ICE_FLEX_PIPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index fc087e0b5292..5735e9542a49 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -162,6 +162,7 @@ struct ice_meta_sect {
#define ICE_SID_RXPARSER_MARKER_PTYPE 55
#define ICE_SID_RXPARSER_BOOST_TCAM 56
+#define ICE_SID_RXPARSER_METADATA_INIT 58
#define ICE_SID_TXPARSER_BOOST_TCAM 66
#define ICE_SID_XLT0_PE 80
@@ -442,6 +443,19 @@ struct ice_tunnel_table {
u16 valid_count[__TNL_TYPE_CNT];
};
+struct ice_dvm_entry {
+ u16 boost_addr;
+ u16 enable;
+ struct ice_boost_tcam_entry *boost_entry;
+};
+
+#define ICE_DVM_MAX_ENTRIES 48
+
+struct ice_dvm_table {
+ struct ice_dvm_entry tbl[ICE_DVM_MAX_ENTRIES];
+ u16 count;
+};
+
struct ice_pkg_es {
__le16 count;
__le16 offset;
@@ -662,4 +676,30 @@ enum ice_prof_type {
ICE_PROF_TUN_ALL = 0x6,
ICE_PROF_ALL = 0xFF,
};
+
+/* Number of bits/bytes contained in meta init entry. Note, this should be a
+ * multiple of 32 bits.
+ */
+#define ICE_META_INIT_BITS 192
+#define ICE_META_INIT_DW_CNT (ICE_META_INIT_BITS / (sizeof(__le32) * \
+ BITS_PER_BYTE))
+
+/* The meta init Flag field starts at this bit */
+#define ICE_META_FLAGS_ST 123
+
+/* The entry and bit to check for Double VLAN Mode (DVM) support */
+#define ICE_META_VLAN_MODE_ENTRY 0
+#define ICE_META_FLAG_VLAN_MODE 60
+#define ICE_META_VLAN_MODE_BIT (ICE_META_FLAGS_ST + \
+ ICE_META_FLAG_VLAN_MODE)
+
+struct ice_meta_init_entry {
+ __le32 bm[ICE_META_INIT_DW_CNT];
+};
+
+struct ice_meta_init_section {
+ __le16 count;
+ __le16 offset;
+ struct ice_meta_init_entry entry;
+};
#endif /* _ICE_FLEX_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
index c29177c6bb9d..af57eb114966 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.c
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -203,21 +203,22 @@ ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
* ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list
* @vsi: pointer to VSI struct
* @list: list to add filter info to
- * @vlan_id: VLAN ID to add
- * @action: filter action
+ * @vlan: VLAN filter details
*/
static int
ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list,
- u16 vlan_id, enum ice_sw_fwd_act_type action)
+ struct ice_vlan *vlan)
{
struct ice_fltr_info info = { 0 };
info.flag = ICE_FLTR_TX;
info.src_id = ICE_SRC_ID_VSI;
info.lkup_type = ICE_SW_LKUP_VLAN;
- info.fltr_act = action;
+ info.fltr_act = ICE_FWD_TO_VSI;
info.vsi_handle = vsi->idx;
- info.l_data.vlan.vlan_id = vlan_id;
+ info.l_data.vlan.vlan_id = vlan->vid;
+ info.l_data.vlan.tpid = vlan->tpid;
+ info.l_data.vlan.tpid_valid = true;
return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
list);
@@ -310,19 +311,17 @@ ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
/**
* ice_fltr_prepare_vlan - add or remove VLAN filter
* @vsi: pointer to VSI struct
- * @vlan_id: VLAN ID to add
- * @action: action to be performed on filter match
+ * @vlan: VLAN filter details
* @vlan_action: pointer to add or remove VLAN function
*/
static int
-ice_fltr_prepare_vlan(struct ice_vsi *vsi, u16 vlan_id,
- enum ice_sw_fwd_act_type action,
+ice_fltr_prepare_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan,
int (*vlan_action)(struct ice_vsi *, struct list_head *))
{
LIST_HEAD(tmp_list);
int result;
- if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan_id, action))
+ if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan))
return -ENOMEM;
result = vlan_action(vsi, &tmp_list);
@@ -395,27 +394,21 @@ int ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
/**
* ice_fltr_add_vlan - add single VLAN filter
* @vsi: pointer to VSI struct
- * @vlan_id: VLAN ID to add
- * @action: action to be performed on filter match
+ * @vlan: VLAN filter details
*/
-int ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vlan_id,
- enum ice_sw_fwd_act_type action)
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
{
- return ice_fltr_prepare_vlan(vsi, vlan_id, action,
- ice_fltr_add_vlan_list);
+ return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_add_vlan_list);
}
/**
* ice_fltr_remove_vlan - remove VLAN filter
* @vsi: pointer to VSI struct
- * @vlan_id: filter VLAN to remove
- * @action: action to remove
+ * @vlan: VLAN filter details
*/
-int ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vlan_id,
- enum ice_sw_fwd_act_type action)
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
{
- return ice_fltr_prepare_vlan(vsi, vlan_id, action,
- ice_fltr_remove_vlan_list);
+ return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_remove_vlan_list);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
index 3eb42479175f..0f3dbc308eec 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.h
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -4,6 +4,8 @@
#ifndef _ICE_FLTR_H_
#define _ICE_FLTR_H_
+#include "ice_vlan.h"
+
void ice_fltr_free_list(struct device *dev, struct list_head *h);
int
ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
@@ -32,12 +34,8 @@ ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
enum ice_sw_fwd_act_type action);
int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list);
-int
-ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vid,
- enum ice_sw_fwd_act_type action);
-int
-ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vid,
- enum ice_sw_fwd_act_type action);
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
int
ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index fc3580167e7b..263a2e7577a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -227,6 +227,11 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i];
+
+ qos->pfc_mode = dcbx_cfg->pfc_mode;
+ if (qos->pfc_mode == IIDC_DSCP_PFC_MODE)
+ for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++)
+ qos->dscp_map[i] = dcbx_cfg->dscp_map[i];
}
EXPORT_SYMBOL_GPL(ice_get_qos_params);
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 85a612838a89..b3baf7c3f910 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -424,6 +424,8 @@ enum ice_rx_flex_desc_status_error_0_bits {
enum ice_rx_flex_desc_status_error_1_bits {
/* Note: These are predefined bit offsets */
ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4,
+ /* [10:5] reserved */
+ ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S = 11,
ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
};
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0c187cf04fcf..f23917d6a495 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -8,6 +8,7 @@
#include "ice_fltr.h"
#include "ice_dcb_lib.h"
#include "ice_devlink.h"
+#include "ice_vsi_vlan_ops.h"
/**
* ice_vsi_type_str - maps VSI type enum to string equivalents
@@ -838,11 +839,12 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
/**
* ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @hw: HW structure used to determine the VLAN mode of the device
* @ctxt: the VSI context being set
*
* This initializes a default VSI context for all sections except the Queues.
*/
-static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
+static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
{
u32 table = 0;
@@ -853,13 +855,27 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
/* Traffic from VSI can be sent to LAN */
ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
- /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy
- * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all
- * packets untagged/tagged.
+ /* allow all untagged/tagged packets by default on Tx */
+ ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
+ ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
+ ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
+ /* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which
+ * results in legacy behavior (show VLAN, DEI, and UP) in descriptor.
+ *
+ * DVM - leave inner VLAN in packet by default
*/
- ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL &
- ICE_AQ_VSI_VLAN_MODE_M) >>
- ICE_AQ_VSI_VLAN_MODE_S);
+ if (ice_is_dvm_ena(hw)) {
+ ctxt->info.inner_vlan_flags |=
+ ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+ ctxt->info.outer_vlan_flags =
+ (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M;
+ ctxt->info.outer_vlan_flags |=
+ (ICE_AQ_VSI_OUTER_TAG_VLAN_8100 <<
+ ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M;
+ }
/* Have 1:1 UP mapping for both ingress/egress tables */
table |= ICE_UP_TABLE_TRANSLATE(0, 0);
table |= ICE_UP_TABLE_TRANSLATE(1, 1);
@@ -1136,7 +1152,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
}
- ice_set_dflt_vsi_ctx(ctxt);
+ ice_set_dflt_vsi_ctx(hw, ctxt);
if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
ice_set_fd_vsi_ctx(ctxt, vsi);
/* if the switch is in VEB mode, allow VSI loopback */
@@ -1168,25 +1184,6 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
}
- /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off
- * respectively
- */
- if (vsi->type == ICE_VSI_VF) {
- ctxt->info.valid_sections |=
- cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
- if (pf->vf[vsi->vf_id].spoofchk) {
- ctxt->info.sec_flags |=
- ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
- (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
- } else {
- ctxt->info.sec_flags &=
- ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
- (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
- }
- }
-
/* Allow control frames out of main VSI */
if (vsi->type == ICE_VSI_PF) {
ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
@@ -1431,6 +1428,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi)
*/
static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
{
+ bool dvm_ena = ice_is_dvm_ena(&vsi->back->hw);
struct ice_pf *pf = vsi->back;
struct device *dev;
u16 i;
@@ -1452,6 +1450,10 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
ring->tx_tstamps = &pf->ptp.port.tx;
ring->dev = dev;
ring->count = vsi->num_tx_desc;
+ if (dvm_ena)
+ ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2;
+ else
+ ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1;
WRITE_ONCE(vsi->tx_rings[i], ring);
}
@@ -1757,62 +1759,6 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
}
/**
- * ice_vsi_add_vlan - Add VSI membership for given VLAN
- * @vsi: the VSI being configured
- * @vid: VLAN ID to be added
- * @action: filter action to be performed on match
- */
-int
-ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action)
-{
- struct ice_pf *pf = vsi->back;
- struct device *dev;
- int err = 0;
-
- dev = ice_pf_to_dev(pf);
-
- if (!ice_fltr_add_vlan(vsi, vid, action)) {
- vsi->num_vlan++;
- } else {
- err = -ENODEV;
- dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid,
- vsi->vsi_num);
- }
-
- return err;
-}
-
-/**
- * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
- * @vsi: the VSI being configured
- * @vid: VLAN ID to be removed
- *
- * Returns 0 on success and negative on failure
- */
-int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
-{
- struct ice_pf *pf = vsi->back;
- struct device *dev;
- int err;
-
- dev = ice_pf_to_dev(pf);
-
- err = ice_fltr_remove_vlan(vsi, vid, ICE_FWD_TO_VSI);
- if (!err) {
- vsi->num_vlan--;
- } else if (err == -ENOENT) {
- dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, error: %d\n",
- vid, vsi->vsi_num, err);
- err = 0;
- } else {
- dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n",
- vid, vsi->vsi_num, err);
- }
-
- return err;
-}
-
-/**
* ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
* @vsi: VSI
*/
@@ -2140,95 +2086,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
}
/**
- * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
- * @vsi: the VSI being changed
- */
-int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
-{
- struct ice_hw *hw = &vsi->back->hw;
- struct ice_vsi_ctx *ctxt;
- int ret;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (!ctxt)
- return -ENOMEM;
-
- /* Here we are configuring the VSI to let the driver add VLAN tags by
- * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag
- * insertion happens in the Tx hot path, in ice_tx_map.
- */
- ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
-
- /* Preserve existing VLAN strip setting */
- ctxt->info.vlan_flags |= (vsi->info.vlan_flags &
- ICE_AQ_VSI_VLAN_EMOD_M);
-
- ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
-
- ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
- if (ret) {
- dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n",
- ret, ice_aq_str(hw->adminq.sq_last_status));
- goto out;
- }
-
- vsi->info.vlan_flags = ctxt->info.vlan_flags;
-out:
- kfree(ctxt);
- return ret;
-}
-
-/**
- * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
- * @vsi: the VSI being changed
- * @ena: boolean value indicating if this is a enable or disable request
- */
-int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
-{
- struct ice_hw *hw = &vsi->back->hw;
- struct ice_vsi_ctx *ctxt;
- int ret;
-
- /* do not allow modifying VLAN stripping when a port VLAN is configured
- * on this VSI
- */
- if (vsi->info.pvid)
- return 0;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (!ctxt)
- return -ENOMEM;
-
- /* Here we are configuring what the VSI should do with the VLAN tag in
- * the Rx packet. We can either leave the tag in the packet or put it in
- * the Rx descriptor.
- */
- if (ena)
- /* Strip VLAN tag from Rx packet and put it in the desc */
- ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH;
- else
- /* Disable stripping. Leave tag in packet */
- ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
-
- /* Allow all packets untagged/tagged */
- ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
-
- ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
-
- ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
- if (ret) {
- dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n",
- ena, ret, ice_aq_str(hw->adminq.sq_last_status));
- goto out;
- }
-
- vsi->info.vlan_flags = ctxt->info.vlan_flags;
-out:
- kfree(ctxt);
- return ret;
-}
-
-/**
* ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings
* @vsi: the VSI whose rings are to be enabled
*
@@ -2321,61 +2178,6 @@ bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi)
return (vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA);
}
-/**
- * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
- * @vsi: VSI to enable or disable VLAN pruning on
- * @ena: set to true to enable VLAN pruning and false to disable it
- *
- * returns 0 if VSI is updated, negative otherwise
- */
-int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena)
-{
- struct ice_vsi_ctx *ctxt;
- struct ice_pf *pf;
- int status;
-
- if (!vsi)
- return -EINVAL;
-
- /* Don't enable VLAN pruning if the netdev is currently in promiscuous
- * mode. VLAN pruning will be enabled when the interface exits
- * promiscuous mode if any VLAN filters are active.
- */
- if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena)
- return 0;
-
- pf = vsi->back;
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (!ctxt)
- return -ENOMEM;
-
- ctxt->info = vsi->info;
-
- if (ena)
- ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
- else
- ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
-
- ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
-
- status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
- if (status) {
- netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n",
- ena ? "En" : "Dis", vsi->idx, vsi->vsi_num,
- status, ice_aq_str(pf->hw.adminq.sq_last_status));
- goto err_out;
- }
-
- vsi->info.sw_flags2 = ctxt->info.sw_flags2;
-
- kfree(ctxt);
- return 0;
-
-err_out:
- kfree(ctxt);
- return -EIO;
-}
-
static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
{
if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
@@ -2655,6 +2457,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
if (ret)
goto unroll_get_qs;
+ ice_vsi_init_vlan_ops(vsi);
+
switch (vsi->type) {
case ICE_VSI_CTRL:
case ICE_VSI_SWITCHDEV_CTRL:
@@ -2675,17 +2479,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
if (ret)
goto unroll_vector_base;
- /* Always add VLAN ID 0 switch rule by default. This is needed
- * in order to allow all untagged and 0 tagged priority traffic
- * if Rx VLAN pruning is enabled. Also there are cases where we
- * don't get the call to add VLAN 0 via ice_vlan_rx_add_vid()
- * so this handles those cases (i.e. adding the PF to a bridge
- * without the 8021q module loaded).
- */
- ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
- if (ret)
- goto unroll_clear_rings;
-
ice_vsi_map_rings_to_vectors(vsi);
/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
@@ -3318,6 +3111,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
if (vtype == ICE_VSI_VF)
vf = &pf->vf[vsi->vf_id];
+ ice_vsi_init_vlan_ops(vsi);
+
coalesce = kcalloc(vsi->num_q_vectors,
sizeof(struct ice_coalesce_stored), GFP_KERNEL);
if (!coalesce)
@@ -4131,6 +3926,115 @@ int ice_set_link(struct ice_vsi *vsi, bool ena)
}
/**
+ * ice_vsi_add_vlan_zero - add VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * In Single VLAN Mode (SVM), single VLAN filters via ICE_SW_LKUP_VLAN are based
+ * on the inner VLAN ID, so the VLAN TPID (i.e. 0x8100 or 0x888a8) doesn't
+ * matter. In Double VLAN Mode (DVM), outer/single VLAN filters via
+ * ICE_SW_LKUP_VLAN are based on the outer/single VLAN ID + VLAN TPID.
+ *
+ * For both modes add a VLAN 0 + no VLAN TPID filter to handle untagged traffic
+ * when VLAN pruning is enabled. Also, this handles VLAN 0 priority tagged
+ * traffic in SVM, since the VLAN TPID isn't part of filtering.
+ *
+ * If DVM is enabled then an explicit VLAN 0 + VLAN TPID filter needs to be
+ * added to allow VLAN 0 priority tagged traffic in DVM, since the VLAN TPID is
+ * part of filtering.
+ */
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct ice_vlan vlan;
+ int err;
+
+ vlan = ICE_VLAN(0, 0, 0);
+ err = vlan_ops->add_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ /* in SVM both VLAN 0 filters are identical */
+ if (!ice_is_dvm_ena(&vsi->back->hw))
+ return 0;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+ err = vlan_ops->add_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_del_vlan_zero - delete VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * Delete the VLAN 0 filters in the same manner that they were added in
+ * ice_vsi_add_vlan_zero.
+ */
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct ice_vlan vlan;
+ int err;
+
+ vlan = ICE_VLAN(0, 0, 0);
+ err = vlan_ops->del_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ /* in SVM both VLAN 0 filters are identical */
+ if (!ice_is_dvm_ena(&vsi->back->hw))
+ return 0;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+ err = vlan_ops->del_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_num_zero_vlans - get number of VLAN 0 filters based on VLAN mode
+ * @vsi: VSI used to get the VLAN mode
+ *
+ * If DVM is enabled then 2 VLAN 0 filters are added, else if SVM is enabled
+ * then 1 VLAN 0 filter is added. See ice_vsi_add_vlan_zero for more details.
+ */
+static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi)
+{
+#define ICE_DVM_NUM_ZERO_VLAN_FLTRS 2
+#define ICE_SVM_NUM_ZERO_VLAN_FLTRS 1
+ /* no VLAN 0 filter is created when a port VLAN is active */
+ if (vsi->type == ICE_VSI_VF &&
+ ice_vf_is_port_vlan_ena(&vsi->back->vf[vsi->vf_id]))
+ return 0;
+ if (ice_is_dvm_ena(&vsi->back->hw))
+ return ICE_DVM_NUM_ZERO_VLAN_FLTRS;
+ else
+ return ICE_SVM_NUM_ZERO_VLAN_FLTRS;
+}
+
+/**
+ * ice_vsi_has_non_zero_vlans - check if VSI has any non-zero VLANs
+ * @vsi: VSI used to determine if any non-zero VLANs have been added
+ */
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi)
+{
+ return (vsi->num_vlan > ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
+ * ice_vsi_num_non_zero_vlans - get the number of non-zero VLANs for this VSI
+ * @vsi: VSI used to get the number of non-zero VLANs added
+ */
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi)
+{
+ return (vsi->num_vlan - ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
* ice_is_feature_supported
* @pf: pointer to the struct ice_pf instance
* @f: feature enum to be checked
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index b2ed189527d6..133fc235141a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -5,6 +5,7 @@
#define _ICE_LIB_H_
#include "ice.h"
+#include "ice_vlan.h"
const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
@@ -22,15 +23,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
-int
-ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action);
-
-int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
-
-int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi);
-
-int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena);
-
int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi);
int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi);
@@ -45,8 +37,6 @@ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi);
-int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena);
-
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
int ice_set_link(struct ice_vsi *vsi, bool ena);
@@ -132,7 +122,10 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
-
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi);
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi);
bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f);
void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f);
void ice_init_feature_support(struct ice_pf *pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 17a9bb461dc3..ce90ebf4b853 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -21,6 +21,7 @@
#include "ice_trace.h"
#include "ice_eswitch.h"
#include "ice_tc_lib.h"
+#include "ice_vsi_vlan_ops.h"
#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
static const char ice_driver_string[] = DRV_SUMMARY;
@@ -244,7 +245,7 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
if (vsi->type != ICE_VSI_PF)
return 0;
- if (vsi->num_vlan > 1)
+ if (ice_vsi_has_non_zero_vlans(vsi))
status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m);
else
status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0);
@@ -264,7 +265,7 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
if (vsi->type != ICE_VSI_PF)
return 0;
- if (vsi->num_vlan > 1)
+ if (ice_vsi_has_non_zero_vlans(vsi))
status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m);
else
status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0);
@@ -279,6 +280,7 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
*/
static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
struct device *dev = ice_pf_to_dev(vsi->back);
struct net_device *netdev = vsi->netdev;
bool promisc_forced_on = false;
@@ -352,7 +354,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
/* check for changes in promiscuous modes */
if (changed_flags & IFF_ALLMULTI) {
if (vsi->current_netdev_flags & IFF_ALLMULTI) {
- if (vsi->num_vlan > 1)
+ if (ice_vsi_has_non_zero_vlans(vsi))
promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_MCAST_PROMISC_BITS;
@@ -366,7 +368,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
}
} else {
/* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
- if (vsi->num_vlan > 1)
+ if (ice_vsi_has_non_zero_vlans(vsi))
promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_MCAST_PROMISC_BITS;
@@ -396,7 +398,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
goto out_promisc;
}
err = 0;
- ice_cfg_vlan_pruning(vsi, false);
+ vlan_ops->dis_rx_filtering(vsi);
}
} else {
/* Clear Rx filter to remove traffic from wire */
@@ -409,8 +411,9 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
IFF_PROMISC;
goto out_promisc;
}
- if (vsi->num_vlan > 1)
- ice_cfg_vlan_pruning(vsi, true);
+ if (vsi->current_netdev_flags &
+ NETIF_F_HW_VLAN_CTAG_FILTER)
+ vlan_ops->ena_rx_filtering(vsi);
}
}
}
@@ -2498,10 +2501,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
xdp_ring->vsi = vsi;
xdp_ring->netdev = NULL;
- xdp_ring->next_dd = ICE_TX_THRESH - 1;
- xdp_ring->next_rs = ICE_TX_THRESH - 1;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
+ xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
+ xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
@@ -3233,6 +3236,7 @@ static void ice_set_ops(struct net_device *netdev)
static void ice_set_netdev_features(struct net_device *netdev)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
netdev_features_t csumo_features;
netdev_features_t vlano_features;
netdev_features_t dflt_features;
@@ -3259,6 +3263,10 @@ static void ice_set_netdev_features(struct net_device *netdev)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
+ /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
+ if (is_dvm_ena)
+ vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
tso_features = NETIF_F_TSO |
NETIF_F_TSO_ECN |
NETIF_F_TSO6 |
@@ -3290,6 +3298,15 @@ static void ice_set_netdev_features(struct net_device *netdev)
tso_features;
netdev->vlan_features |= dflt_features | csumo_features |
tso_features;
+
+ /* advertise support but don't enable by default since only one type of
+ * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
+ * type turns on the other has to be turned off. This is enforced by the
+ * ice_fix_features() ndo callback.
+ */
+ if (is_dvm_ena)
+ netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX;
}
/**
@@ -3405,34 +3422,31 @@ ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
/**
* ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
* @netdev: network interface to be adjusted
- * @proto: unused protocol
+ * @proto: VLAN TPID
* @vid: VLAN ID to be added
*
* net_device_ops implementation for adding VLAN IDs
*/
static int
-ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
- u16 vid)
+ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi_vlan_ops *vlan_ops;
struct ice_vsi *vsi = np->vsi;
+ struct ice_vlan vlan;
int ret;
/* VLAN 0 is added by default during load/reset */
if (!vid)
return 0;
- /* Enable VLAN pruning when a VLAN other than 0 is added */
- if (!ice_vsi_is_vlan_pruning_ena(vsi)) {
- ret = ice_cfg_vlan_pruning(vsi, true);
- if (ret)
- return ret;
- }
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
/* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
* packets aren't pruned by the device's internal switch on Rx
*/
- ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
+ vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+ ret = vlan_ops->add_vlan(vsi, &vlan);
if (!ret)
set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
@@ -3442,36 +3456,36 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
/**
* ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
* @netdev: network interface to be adjusted
- * @proto: unused protocol
+ * @proto: VLAN TPID
* @vid: VLAN ID to be removed
*
* net_device_ops implementation for removing VLAN IDs
*/
static int
-ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
- u16 vid)
+ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi_vlan_ops *vlan_ops;
struct ice_vsi *vsi = np->vsi;
+ struct ice_vlan vlan;
int ret;
/* don't allow removal of VLAN 0 */
if (!vid)
return 0;
- /* Make sure ice_vsi_kill_vlan is successful before updating VLAN
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ /* Make sure VLAN delete is successful before updating VLAN
* information
*/
- ret = ice_vsi_kill_vlan(vsi, vid);
+ vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+ ret = vlan_ops->del_vlan(vsi, &vlan);
if (ret)
return ret;
- /* Disable pruning when VLAN 0 is the only VLAN rule */
- if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi))
- ret = ice_cfg_vlan_pruning(vsi, false);
-
set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
- return ret;
+ return 0;
}
/**
@@ -3540,12 +3554,17 @@ static int ice_tc_indir_block_register(struct ice_vsi *vsi)
static int ice_setup_pf_sw(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
+ bool dvm = ice_is_dvm_ena(&pf->hw);
struct ice_vsi *vsi;
int status;
if (ice_is_reset_in_progress(pf->state))
return -EBUSY;
+ status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+ if (status)
+ return -EIO;
+
vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
if (!vsi)
return -ENOMEM;
@@ -4067,8 +4086,8 @@ static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
/* allow all VLANs on Tx and don't strip on Rx */
- ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL |
- ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL |
+ ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
@@ -4077,7 +4096,7 @@ static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
} else {
vsi->info.sec_flags = ctxt->info.sec_flags;
vsi->info.sw_flags2 = ctxt->info.sw_flags2;
- vsi->info.vlan_flags = ctxt->info.vlan_flags;
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
}
kfree(ctxt);
@@ -4462,8 +4481,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
/* set up for high or low DMA */
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
- if (err)
- err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
if (err) {
dev_err(dev, "DMA configuration failed: 0x%x\n", err);
return err;
@@ -5573,6 +5590,194 @@ ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
return err;
}
+#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_CTAG_TX | \
+ NETIF_F_HW_VLAN_STAG_RX | \
+ NETIF_F_HW_VLAN_STAG_TX)
+
+#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
+ NETIF_F_HW_VLAN_STAG_FILTER)
+
+/**
+ * ice_fix_features - fix the netdev features flags based on device limitations
+ * @netdev: ptr to the netdev that flags are being fixed on
+ * @features: features that need to be checked and possibly fixed
+ *
+ * Make sure any fixups are made to features in this callback. This enables the
+ * driver to not have to check unsupported configurations throughout the driver
+ * because that's the responsiblity of this callback.
+ *
+ * Single VLAN Mode (SVM) Supported Features:
+ * NETIF_F_HW_VLAN_CTAG_FILTER
+ * NETIF_F_HW_VLAN_CTAG_RX
+ * NETIF_F_HW_VLAN_CTAG_TX
+ *
+ * Double VLAN Mode (DVM) Supported Features:
+ * NETIF_F_HW_VLAN_CTAG_FILTER
+ * NETIF_F_HW_VLAN_CTAG_RX
+ * NETIF_F_HW_VLAN_CTAG_TX
+ *
+ * NETIF_F_HW_VLAN_STAG_FILTER
+ * NETIF_HW_VLAN_STAG_RX
+ * NETIF_HW_VLAN_STAG_TX
+ *
+ * Features that need fixing:
+ * Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
+ * These are mutually exlusive as the VSI context cannot support multiple
+ * VLAN ethertypes simultaneously for stripping and/or insertion. If this
+ * is not done, then default to clearing the requested STAG offload
+ * settings.
+ *
+ * All supported filtering has to be enabled or disabled together. For
+ * example, in DVM, CTAG and STAG filtering have to be enabled and disabled
+ * together. If this is not done, then default to VLAN filtering disabled.
+ * These are mutually exclusive as there is currently no way to
+ * enable/disable VLAN filtering based on VLAN ethertype when using VLAN
+ * prune rules.
+ */
+static netdev_features_t
+ice_fix_features(struct net_device *netdev, netdev_features_t features)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ netdev_features_t supported_vlan_filtering;
+ netdev_features_t requested_vlan_filtering;
+ struct ice_vsi *vsi = np->vsi;
+
+ requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES;
+
+ /* make sure supported_vlan_filtering works for both SVM and DVM */
+ supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER;
+ if (ice_is_dvm_ena(&vsi->back->hw))
+ supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+ if (requested_vlan_filtering &&
+ requested_vlan_filtering != supported_vlan_filtering) {
+ if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n");
+ features |= supported_vlan_filtering;
+ } else {
+ netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n");
+ features &= ~supported_vlan_filtering;
+ }
+ }
+
+ if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
+ (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) {
+ netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
+ features &= ~(NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX);
+ }
+
+ return features;
+}
+
+/**
+ * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN offload settings
+ *
+ * First, determine the vlan_ethertype based on the VLAN offload bits in
+ * features. Then determine if stripping and insertion should be enabled or
+ * disabled. Finally enable or disable VLAN stripping and insertion.
+ */
+static int
+ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+ bool enable_stripping = true, enable_insertion = true;
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int strip_err = 0, insert_err = 0;
+ u16 vlan_ethertype = 0;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
+ vlan_ethertype = ETH_P_8021AD;
+ else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
+ vlan_ethertype = ETH_P_8021Q;
+
+ if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
+ enable_stripping = false;
+ if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
+ enable_insertion = false;
+
+ if (enable_stripping)
+ strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
+ else
+ strip_err = vlan_ops->dis_stripping(vsi);
+
+ if (enable_insertion)
+ insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
+ else
+ insert_err = vlan_ops->dis_insertion(vsi);
+
+ if (strip_err || insert_err)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN filtering settings
+ *
+ * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
+ * features.
+ */
+static int
+ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ int err = 0;
+
+ /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
+ * if either bit is set
+ */
+ if (features &
+ (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER))
+ err = vlan_ops->ena_rx_filtering(vsi);
+ else
+ err = vlan_ops->dis_rx_filtering(vsi);
+
+ return err;
+}
+
+/**
+ * ice_set_vlan_features - set VLAN settings based on suggested feature set
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ *
+ * Only update VLAN settings if the requested_vlan_features are different than
+ * the current_vlan_features.
+ */
+static int
+ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
+{
+ netdev_features_t current_vlan_features, requested_vlan_features;
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ int err;
+
+ current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
+ requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
+ if (current_vlan_features ^ requested_vlan_features) {
+ err = ice_set_vlan_offload_features(vsi, features);
+ if (err)
+ return err;
+ }
+
+ current_vlan_features = netdev->features &
+ NETIF_VLAN_FILTERING_FEATURES;
+ requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
+ if (current_vlan_features ^ requested_vlan_features) {
+ err = ice_set_vlan_filtering_features(vsi, features);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/**
* ice_set_features - set the netdev feature flags
* @netdev: ptr to the netdev being adjusted
@@ -5607,26 +5812,9 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
netdev->features & NETIF_F_RXHASH)
ice_vsi_manage_rss_lut(vsi, false);
- if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
- !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
- ret = ice_vsi_manage_vlan_stripping(vsi, true);
- else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
- ret = ice_vsi_manage_vlan_stripping(vsi, false);
-
- if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
- !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
- ret = ice_vsi_manage_vlan_insertion(vsi);
- else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
- (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
- ret = ice_vsi_manage_vlan_insertion(vsi);
-
- if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
- !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- ret = ice_cfg_vlan_pruning(vsi, true);
- else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
- (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- ret = ice_cfg_vlan_pruning(vsi, false);
+ ret = ice_set_vlan_features(netdev, features);
+ if (ret)
+ return ret;
if ((features & NETIF_F_NTUPLE) &&
!(netdev->features & NETIF_F_NTUPLE)) {
@@ -5650,23 +5838,26 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
else
clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
- return ret;
+ return 0;
}
/**
- * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
* @vsi: VSI to setup VLAN properties for
*/
static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
{
- int ret = 0;
+ int err;
- if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
- ret = ice_vsi_manage_vlan_stripping(vsi, true);
- if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)
- ret = ice_vsi_manage_vlan_insertion(vsi);
+ err = ice_set_vlan_offload_features(vsi, vsi->netdev->features);
+ if (err)
+ return err;
- return ret;
+ err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features);
+ if (err)
+ return err;
+
+ return ice_vsi_add_vlan_zero(vsi);
}
/**
@@ -6267,11 +6458,12 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
*/
int ice_down(struct ice_vsi *vsi)
{
- int i, tx_err, rx_err, link_err = 0;
+ int i, tx_err, rx_err, link_err = 0, vlan_err = 0;
WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ vlan_err = ice_vsi_del_vlan_zero(vsi);
if (!ice_is_e810(&vsi->back->hw))
ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
netif_carrier_off(vsi->netdev);
@@ -6313,7 +6505,7 @@ int ice_down(struct ice_vsi *vsi)
ice_for_each_rxq(vsi, i)
ice_clean_rx_ring(vsi->rx_rings[i]);
- if (tx_err || rx_err || link_err) {
+ if (tx_err || rx_err || link_err || vlan_err) {
netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
vsi->vsi_num, vsi->vsw->sw_id);
return -EIO;
@@ -6623,6 +6815,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
+ bool dvm;
int err;
if (test_bit(ICE_DOWN, pf->state))
@@ -6686,6 +6879,12 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
goto err_init_ctrlq;
}
+ dvm = ice_is_dvm_ena(hw);
+
+ err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+ if (err)
+ goto err_init_ctrlq;
+
err = ice_sched_init_port(hw->port_info);
if (err)
goto err_sched_init_port;
@@ -8594,6 +8793,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_start_xmit = ice_start_xmit,
.ndo_select_queue = ice_select_queue,
.ndo_features_check = ice_features_check,
+ .ndo_fix_features = ice_fix_features,
.ndo_set_rx_mode = ice_set_rx_mode,
.ndo_set_mac_address = ice_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
index f57c414bc0a9..380e8ae94fc9 100644
--- a/drivers/net/ethernet/intel/ice/ice_osdep.h
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -9,6 +9,7 @@
#ifndef CONFIG_64BIT
#include <linux/io-64-nonatomic-lo-hi.h>
#endif
+#include <net/udp_tunnel.h>
#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
#define rd32(a, reg) readl((a)->hw_addr + (reg))
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
new file mode 100644
index 000000000000..976a03d3bdd5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_pf_vsi_vlan_ops.h"
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+
+ if (ice_is_dvm_ena(&vsi->back->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+ vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ } else {
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ }
+}
+
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
new file mode 100644
index 000000000000..6741ec8c5f6b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_PF_VSI_VLAN_OPS_H_
+#define _ICE_PF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 11ae0bee3590..4143728a1919 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1097,6 +1097,64 @@ ice_aq_get_recipe(struct ice_hw *hw,
}
/**
+ * ice_update_recipe_lkup_idx - update a default recipe based on the lkup_idx
+ * @hw: pointer to the HW struct
+ * @params: parameters used to update the default recipe
+ *
+ * This function only supports updating default recipes and it only supports
+ * updating a single recipe based on the lkup_idx at a time.
+ *
+ * This is done as a read-modify-write operation. First, get the current recipe
+ * contents based on the recipe's ID. Then modify the field vector index and
+ * mask if it's valid at the lkup_idx. Finally, use the add recipe AQ to update
+ * the pre-existing recipe with the modifications.
+ */
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+ struct ice_update_recipe_lkup_idx_params *params)
+{
+ struct ice_aqc_recipe_data_elem *rcp_list;
+ u16 num_recps = ICE_MAX_NUM_RECIPES;
+ int status;
+
+ rcp_list = kcalloc(num_recps, sizeof(*rcp_list), GFP_KERNEL);
+ if (!rcp_list)
+ return -ENOMEM;
+
+ /* read current recipe list from firmware */
+ rcp_list->recipe_indx = params->rid;
+ status = ice_aq_get_recipe(hw, rcp_list, &num_recps, params->rid, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to get recipe %d, status %d\n",
+ params->rid, status);
+ goto error_out;
+ }
+
+ /* only modify existing recipe's lkup_idx and mask if valid, while
+ * leaving all other fields the same, then update the recipe firmware
+ */
+ rcp_list->content.lkup_indx[params->lkup_idx] = params->fv_idx;
+ if (params->mask_valid)
+ rcp_list->content.mask[params->lkup_idx] =
+ cpu_to_le16(params->mask);
+
+ if (params->ignore_valid)
+ rcp_list->content.lkup_indx[params->lkup_idx] |=
+ ICE_AQ_RECIPE_LKUP_IGNORE;
+
+ status = ice_aq_add_recipe(hw, &rcp_list[0], 1, NULL);
+ if (status)
+ ice_debug(hw, ICE_DBG_SW, "Failed to update recipe %d lkup_idx %d fv_idx %d mask %d mask_valid %s, status %d\n",
+ params->rid, params->lkup_idx, params->fv_idx,
+ params->mask, params->mask_valid ? "true" : "false",
+ status);
+
+error_out:
+ kfree(rcp_list);
+ return status;
+}
+
+/**
* ice_aq_map_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
@@ -1539,6 +1597,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc)
{
u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+ u16 vlan_tpid = ETH_P_8021Q;
void *daddr = NULL;
u16 eth_hdr_sz;
u8 *eth_hdr;
@@ -1611,6 +1670,8 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
break;
case ICE_SW_LKUP_VLAN:
vlan_id = f_info->l_data.vlan.vlan_id;
+ if (f_info->l_data.vlan.tpid_valid)
+ vlan_tpid = f_info->l_data.vlan.tpid;
if (f_info->fltr_act == ICE_FWD_TO_VSI ||
f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
act |= ICE_SINGLE_ACT_PRUNE;
@@ -1653,6 +1714,8 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
if (!(vlan_id > ICE_MAX_VLAN_ID)) {
off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
*off = cpu_to_be16(vlan_id);
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+ *off = cpu_to_be16(vlan_tpid);
}
/* Create the switch rule with the final dummy Ethernet header */
@@ -3868,6 +3931,23 @@ ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts,
}
/**
+ * ice_change_proto_id_to_dvm - change proto id in prot_id_tbl
+ *
+ * As protocol id for outer vlan is different in dvm and svm, if dvm is
+ * supported protocol array record for outer vlan has to be modified to
+ * reflect the value proper for DVM.
+ */
+void ice_change_proto_id_to_dvm(void)
+{
+ u8 i;
+
+ for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++)
+ if (ice_prot_id_tbl[i].type == ICE_VLAN_OFOS &&
+ ice_prot_id_tbl[i].protocol_id != ICE_VLAN_OF_HW)
+ ice_prot_id_tbl[i].protocol_id = ICE_VLAN_OF_HW;
+}
+
+/**
* ice_prot_type_to_id - get protocol ID from protocol type
* @type: protocol type
* @id: pointer to variable that will receive the ID
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index d8334beaaa8a..7b42c51a3eb0 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -33,15 +33,6 @@ struct ice_vsi_ctx {
struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
};
-enum ice_sw_fwd_act_type {
- ICE_FWD_TO_VSI = 0,
- ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
- ICE_FWD_TO_Q,
- ICE_FWD_TO_QGRP,
- ICE_DROP_PACKET,
- ICE_INVAL_ACT
-};
-
/* Switch recipe ID enum values are specific to hardware */
enum ice_sw_lkup_type {
ICE_SW_LKUP_ETHERTYPE = 0,
@@ -86,6 +77,8 @@ struct ice_fltr_info {
} mac_vlan;
struct {
u16 vlan_id;
+ u16 tpid;
+ u8 tpid_valid;
} vlan;
/* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
* if just using ethertype as filter. Set lkup_type as
@@ -125,6 +118,15 @@ struct ice_fltr_info {
u8 lan_en; /* Indicate if packet can be forwarded to the uplink */
};
+struct ice_update_recipe_lkup_idx_params {
+ u16 rid;
+ u16 fv_idx;
+ bool ignore_valid;
+ u16 mask;
+ bool mask_valid;
+ u8 lkup_idx;
+};
+
struct ice_adv_lkup_elem {
enum ice_protocol_type type;
union ice_prot_hdr h_u; /* Header values */
@@ -367,4 +369,8 @@ void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);
int
ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+ struct ice_update_recipe_lkup_idx_params *params);
+void ice_change_proto_id_to_dvm(void);
#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 3e38695f1c9d..ff93ec71aed6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -173,6 +173,8 @@ tx_skip_free:
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
+ tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
if (!tx_ring->netdev)
return;
@@ -983,15 +985,17 @@ static struct sk_buff *
ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct xdp_buff *xdp)
{
+ unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- net_prefetch(xdp->data);
+ net_prefetch(xdp->data_meta);
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+ ICE_RX_HDR_SIZE + metasize,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -1003,8 +1007,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
- sizeof(long)));
+ memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
+ ALIGN(headlen + metasize, sizeof(long)));
+
+ if (metasize) {
+ skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
@@ -1080,7 +1089,7 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
- if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
+ if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->rx_stats.non_eop_descs++;
@@ -1142,7 +1151,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
* hardware wrote DD then it will be non-zero
*/
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
- if (!ice_test_staterr(rx_desc, stat_err_bits))
+ if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
break;
/* This memory barrier is needed to keep us from reading
@@ -1228,14 +1237,13 @@ construct_skb:
continue;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
- if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
+ if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
+ stat_err_bits))) {
dev_kfree_skb_any(skb);
continue;
}
- stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
- if (ice_test_staterr(rx_desc, stat_err_bits))
- vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+ vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
/* pad the skb if needed, to make a valid ethernet frame */
if (eth_skb_pad(skb)) {
@@ -1460,7 +1468,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
bool wd;
if (tx_ring->xsk_pool)
- wd = ice_clean_tx_irq_zc(tx_ring, budget);
+ wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
else if (ice_ring_is_xdp(tx_ring))
wd = true;
else
@@ -1513,7 +1521,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
*/
- if (likely(napi_complete_done(napi, work_done))) {
+ if (napi_complete_done(napi, work_done)) {
ice_net_dim(q_vector);
ice_enable_interrupt(q_vector);
} else {
@@ -1917,12 +1925,16 @@ ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
return;
- /* currently, we always assume 802.1Q for VLAN insertion as VLAN
- * insertion for 802.1AD is not supported
+ /* the VLAN ethertype/tpid is determined by VSI configuration and netdev
+ * feature flags, which the driver only allows either 802.1Q or 802.1ad
+ * VLAN offloads exclusively so we only care about the VLAN ID here
*/
if (skb_vlan_tag_present(skb)) {
first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
- first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+ if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+ first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+ else
+ first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
}
ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
@@ -2295,6 +2307,13 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
/* prepare the VLAN tagging flags for Tx */
ice_tx_prepare_vlan_flags(tx_ring, first);
+ if (first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) {
+ offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ (ICE_TX_CTX_DESC_IL2TAG2 <<
+ ICE_TXD_CTX_QW1_CMD_S));
+ offload.cd_l2tag2 = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+ ICE_TX_FLAGS_VLAN_S;
+ }
/* set up TSO offload */
tso = ice_tso(first, &offload);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b7b3bd4816f0..cead3eb149bd 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -13,7 +13,6 @@
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
-#define ICE_TX_THRESH 32
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
@@ -111,6 +110,8 @@ static inline int ice_skb_pad(void)
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->next_to_clean - (R)->next_to_use - 1)
+#define ICE_RING_QUARTER(R) ((R)->count >> 2)
+
#define ICE_TX_FLAGS_TSO BIT(0)
#define ICE_TX_FLAGS_HW_VLAN BIT(1)
#define ICE_TX_FLAGS_SW_VLAN BIT(2)
@@ -122,6 +123,7 @@ static inline int ice_skb_pad(void)
#define ICE_TX_FLAGS_IPV4 BIT(5)
#define ICE_TX_FLAGS_IPV6 BIT(6)
#define ICE_TX_FLAGS_TUNNEL BIT(7)
+#define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN BIT(8)
#define ICE_TX_FLAGS_VLAN_M 0xffff0000
#define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000
#define ICE_TX_FLAGS_VLAN_PR_S 29
@@ -321,18 +323,21 @@ struct ice_tx_ring {
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
/* stats structs */
- struct ice_q_stats stats;
- struct u64_stats_sync syncp;
struct ice_txq_stats tx_stats;
-
/* CL3 - 3rd cacheline starts here */
+ struct ice_q_stats stats;
+ struct u64_stats_sync syncp;
struct rcu_head rcu; /* to avoid race on free */
DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
struct ice_channel *ch;
struct ice_ptp_tx *tx_tstamps;
spinlock_t tx_lock;
u32 txq_teid; /* Added Tx queue TEID */
+ /* CL4 - 4th cacheline starts here */
+ u16 xdp_tx_active;
#define ICE_TX_FLAGS_RING_XDP BIT(0)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
u8 flags;
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_tx;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 0e87b98e0966..7ee38d02d1e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -209,9 +209,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
void
ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
{
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
+ netdev_features_t features = rx_ring->netdev->features;
+ bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK);
+
+ if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+ else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
@@ -222,6 +227,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
{
unsigned int total_bytes = 0, total_pkts = 0;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *next_dd_desc;
u16 next_dd = xdp_ring->next_dd;
@@ -233,7 +239,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
return;
- for (i = 0; i < ICE_TX_THRESH; i++) {
+ for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
total_bytes += tx_buf->bytecount;
@@ -254,9 +260,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
}
next_dd_desc->cmd_type_offset_bsz = 0;
- xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH;
+ xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
if (xdp_ring->next_dd > xdp_ring->count)
- xdp_ring->next_dd = ICE_TX_THRESH - 1;
+ xdp_ring->next_dd = tx_thresh - 1;
xdp_ring->next_to_clean = ntc;
ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
}
@@ -269,12 +275,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
*/
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
{
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 i = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
- if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH)
+ if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
ice_clean_xdp_irq(xdp_ring);
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
@@ -300,13 +307,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
size, 0);
+ xdp_ring->xdp_tx_active++;
i++;
if (i == xdp_ring->count) {
i = 0;
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
- xdp_ring->next_rs = ICE_TX_THRESH - 1;
+ xdp_ring->next_rs = tx_thresh - 1;
}
xdp_ring->next_to_use = i;
@@ -314,7 +322,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
- xdp_ring->next_rs += ICE_TX_THRESH;
+ xdp_ring->next_rs += tx_thresh;
}
return ICE_XDP_TX;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 11b6c1601986..c7d2954dc9ea 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -7,7 +7,7 @@
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @status_err_n: Rx descriptor status_error0 or status_error1 bits
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
@@ -16,9 +16,9 @@
* at offset zero.
*/
static inline bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
{
- return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+ return !!(status_err_n & cpu_to_le16(stat_err_bits));
}
static inline __le64
@@ -32,6 +32,30 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
}
/**
+ * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor
+ * @rx_desc: Rx 32b flex descriptor with RXDID=2
+ *
+ * The OS and current PF implementation only support stripping a single VLAN tag
+ * at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
+ * one is found return the tag, else return 0 to mean no VLAN tag was found.
+ */
+static inline u16
+ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc)
+{
+ u16 stat_err_bits;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+ if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+ return le16_to_cpu(rx_desc->wb.l2tag1);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
+ if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
+ return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
+
+ return 0;
+}
+
+/**
* ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
*
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 546145dd1f02..28fcab26b868 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -15,6 +15,7 @@
#include "ice_flex_type.h"
#include "ice_protocol_type.h"
#include "ice_sbq_cmd.h"
+#include "ice_vlan_mode.h"
static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
{
@@ -54,6 +55,11 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
#define ICE_DBG_AQ_DESC BIT_ULL(25)
#define ICE_DBG_AQ_DESC_BUF BIT_ULL(26)
#define ICE_DBG_AQ_CMD BIT_ULL(27)
+#define ICE_DBG_AQ (ICE_DBG_AQ_MSG | \
+ ICE_DBG_AQ_DESC | \
+ ICE_DBG_AQ_DESC_BUF | \
+ ICE_DBG_AQ_CMD)
+
#define ICE_DBG_USER BIT_ULL(31)
enum ice_aq_res_ids {
@@ -920,6 +926,9 @@ struct ice_hw {
struct udp_tunnel_nic_shared udp_tunnel_shared;
struct udp_tunnel_nic_info udp_tunnel_nic;
+ /* dvm boost update information */
+ struct ice_dvm_table dvm_upd;
+
/* HW block tables */
struct ice_blk_info blk[ICE_BLK_COUNT];
struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */
@@ -943,6 +952,7 @@ struct ice_hw {
struct list_head rss_list_head;
struct ice_mbx_snapshot mbx_snapshot;
DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX);
+ u8 dvm_ena;
u16 io_expander_handle;
};
@@ -1008,6 +1018,15 @@ struct ice_hw_port_stats {
u64 fd_sb_match;
};
+enum ice_sw_fwd_act_type {
+ ICE_FWD_TO_VSI = 0,
+ ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+ ICE_FWD_TO_Q,
+ ICE_FWD_TO_QGRP,
+ ICE_DROP_PACKET,
+ ICE_INVAL_ACT
+};
+
struct ice_aq_get_set_rss_lut_params {
u16 vsi_handle; /* software VSI handle */
u16 lut_size; /* size of the LUT buffer */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
new file mode 100644
index 000000000000..39f2d36cabba
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_virtchnl_pf.h"
+
+static int
+noop_vlan_arg(struct ice_vsi __always_unused *vsi,
+ struct ice_vlan __always_unused *vlan)
+{
+ return 0;
+}
+
+static int
+noop_vlan(struct ice_vsi __always_unused *vsi)
+{
+ return 0;
+}
+
+/**
+ * ice_vf_vsi_init_vlan_ops - Initialize default VSI VLAN ops for VF VSI
+ * @vsi: VF's VSI being configured
+ *
+ * If Double VLAN Mode (DVM) is enabled, assume that the VF supports the new
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2 capability and set up the VLAN ops accordingly.
+ * If SVM is enabled maintain the same level of VLAN support previous to
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2.
+ */
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vsi->back;
+ struct ice_vf *vf;
+
+ vf = &pf->vf[vsi->vf_id];
+
+ if (ice_is_dvm_ena(&pf->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ /* outer VLAN ops regardless of port VLAN config */
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ /* setup outer VLAN ops */
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+ vlan_ops->add_vlan = noop_vlan_arg;
+ vlan_ops->del_vlan = noop_vlan_arg;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ } else {
+ if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+ vlan_ops->ena_rx_filtering = noop_vlan;
+ else
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ }
+ } else {
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ /* inner VLAN ops regardless of port VLAN config */
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+ } else {
+ if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+ vlan_ops->ena_rx_filtering = noop_vlan;
+ else
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ }
+ }
+}
+
+/**
+ * ice_vf_vsi_cfg_dvm_legacy_vlan_mode - Config VLAN mode for old VFs in DVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Double VLAN Mode (DVM) is enabled, there
+ * is not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * This function sets up the VF VSI's inner and outer ice_vsi_vlan_ops and also
+ * initializes software only VLAN mode (i.e. allow all VLANs). Also, use no-op
+ * implementations for any functions that may be called during the lifetime of
+ * the VF so these methods do nothing and succeed.
+ */
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+ struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi_vlan_ops *vlan_ops;
+
+ if (!ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+ return;
+
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ /* Rx VLAN filtering always disabled to allow software offloaded VLANs
+ * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+ * port VLAN configured
+ */
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ /* Don't fail when attempting to enable Rx VLAN filtering */
+ vlan_ops->ena_rx_filtering = noop_vlan;
+
+ /* Tx VLAN filtering always disabled to allow software offloaded VLANs
+ * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+ * port VLAN configured
+ */
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+ /* Don't fail when attempting to enable Tx VLAN filtering */
+ vlan_ops->ena_tx_filtering = noop_vlan;
+
+ if (vlan_ops->dis_rx_filtering(vsi))
+ dev_dbg(dev, "Failed to disable Rx VLAN filtering for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+ if (vlan_ops->dis_tx_filtering(vsi))
+ dev_dbg(dev, "Failed to disable Tx VLAN filtering for old VF without VIRTHCNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ /* All outer VLAN offloads must be disabled */
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ if (vlan_ops->dis_stripping(vsi))
+ dev_dbg(dev, "Failed to disable outer VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ if (vlan_ops->dis_insertion(vsi))
+ dev_dbg(dev, "Failed to disable outer VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ /* All inner VLAN offloads must be disabled */
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ if (vlan_ops->dis_stripping(vsi))
+ dev_dbg(dev, "Failed to disable inner VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ if (vlan_ops->dis_insertion(vsi))
+ dev_dbg(dev, "Failed to disable inner VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+}
+
+/**
+ * ice_vf_vsi_cfg_svm_legacy_vlan_mode - Config VLAN mode for old VFs in SVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Single VLAN Mode (SVM) is enabled, there is
+ * not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * All of the normal SVM VLAN ops are identical for this case. However, by
+ * default Rx VLAN filtering should be turned off by default in this case.
+ */
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+ struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+ if (ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+ return;
+
+ if (vsi->inner_vlan_ops.dis_rx_filtering(vsi))
+ dev_dbg(ice_pf_to_dev(vf->pf), "Failed to disable Rx VLAN filtering for old VF with VIRTCHNL_VF_OFFLOAD_VLAN support\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
new file mode 100644
index 000000000000..875a4e615f39
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_VSI_VLAN_OPS_H_
+#define _ICE_VF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi);
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi);
+
+#ifdef CONFIG_PCI_IOV
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+#else
+static inline void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
index 9feebe5f556c..5a82216e7d03 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -55,6 +55,15 @@ static const u32 vlan_allowlist_opcodes[] = {
VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
};
+/* VIRTCHNL_VF_OFFLOAD_VLAN_V2 */
+static const u32 vlan_v2_allowlist_opcodes[] = {
+ VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS, VIRTCHNL_OP_ADD_VLAN_V2,
+ VIRTCHNL_OP_DEL_VLAN_V2, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
+ VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+ VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2,
+ VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2,
+};
+
/* VIRTCHNL_VF_OFFLOAD_RSS_PF */
static const u32 rss_pf_allowlist_opcodes[] = {
VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT,
@@ -89,6 +98,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = {
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes),
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes),
ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes),
};
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 39b80124d282..02a8c15d2bf3 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -10,6 +10,8 @@
#include "ice_eswitch.h"
#include "ice_virtchnl_allowlist.h"
#include "ice_flex_pipe.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_vlan.h"
#define FIELD_SELECTOR(proto_hdr_field) \
BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
@@ -643,55 +645,6 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
}
/**
- * ice_vsi_manage_pvid - Enable or disable port VLAN for VSI
- * @vsi: the VSI to update
- * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
- * @enable: true for enable PVID false for disable
- */
-static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable)
-{
- struct ice_hw *hw = &vsi->back->hw;
- struct ice_aqc_vsi_props *info;
- struct ice_vsi_ctx *ctxt;
- int ret;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (!ctxt)
- return -ENOMEM;
-
- ctxt->info = vsi->info;
- info = &ctxt->info;
- if (enable) {
- info->vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED |
- ICE_AQ_VSI_PVLAN_INSERT_PVID |
- ICE_AQ_VSI_VLAN_EMOD_STR;
- info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
- } else {
- info->vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING |
- ICE_AQ_VSI_VLAN_MODE_ALL;
- info->sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
- }
-
- info->pvid = cpu_to_le16(pvid_info);
- info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
- ICE_AQ_VSI_PROP_SW_VALID);
-
- ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
- if (ret) {
- dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
- ret, ice_aq_str(hw->adminq.sq_last_status));
- goto out;
- }
-
- vsi->info.vlan_flags = info->vlan_flags;
- vsi->info.sw_flags2 = info->sw_flags2;
- vsi->info.pvid = info->pvid;
-out:
- kfree(ctxt);
- return ret;
-}
-
-/**
* ice_vf_get_port_info - Get the VF's port info structure
* @vf: VF used to get the port info structure for
*/
@@ -800,43 +753,151 @@ static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf)
return 0;
}
+static u16 ice_vf_get_port_vlan_id(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.vid;
+}
+
+static u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.prio;
+}
+
+bool ice_vf_is_port_vlan_ena(struct ice_vf *vf)
+{
+ return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf));
+}
+
+static u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.tpid;
+}
+
/**
* ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
* @vf: VF to add MAC filters for
+ * @vsi: Pointer to VSI
*
* Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
* always re-adds either a VLAN 0 or port VLAN based filter after reset.
*/
-static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
+static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
struct device *dev = ice_pf_to_dev(vf->pf);
- struct ice_vsi *vsi = ice_get_vf_vsi(vf);
- u16 vlan_id = 0;
int err;
- if (vf->port_vlan_info) {
- err = ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info);
if (err) {
dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
vf->vf_id, err);
return err;
}
- vlan_id = vf->port_vlan_info & VLAN_VID_MASK;
+ err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info);
+ } else {
+ err = ice_vsi_add_vlan_zero(vsi);
}
- /* vlan_id will either be 0 or the port VLAN number */
- err = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
if (err) {
- dev_err(dev, "failed to add %s VLAN %u filter for VF %u, error %d\n",
- vf->port_vlan_info ? "port" : "", vlan_id, vf->vf_id,
- err);
+ dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n",
+ ice_vf_is_port_vlan_ena(vf) ?
+ ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err);
return err;
}
+ err = vlan_ops->ena_rx_filtering(vsi);
+ if (err)
+ dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n",
+ vf->vf_id, vsi->idx, err);
+
return 0;
}
+static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable)
+{
+ struct ice_vsi_ctx *ctx;
+ int err;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.sec_flags = vsi->info.sec_flags;
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+ if (enable)
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+ else
+ ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+
+ err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n",
+ enable ? "ON" : "OFF", vsi->vsi_num, err);
+ else
+ vsi->info.sec_flags = ctx->info.sec_flags;
+
+ kfree(ctx);
+
+ return err;
+}
+
+/**
+ * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI
+ * @vsi: VSI to enable Tx spoof checking for
+ */
+static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int err;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ err = vlan_ops->ena_tx_filtering(vsi);
+ if (err)
+ return err;
+
+ return ice_cfg_mac_antispoof(vsi, true);
+}
+
+/**
+ * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI
+ * @vsi: VSI to disable Tx spoof checking for
+ */
+static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int err;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ err = vlan_ops->dis_tx_filtering(vsi);
+ if (err)
+ return err;
+
+ return ice_cfg_mac_antispoof(vsi, false);
+}
+
+/**
+ * ice_vf_set_spoofchk_cfg - apply Tx spoof checking setting
+ * @vf: VF set spoofchk for
+ * @vsi: VSI associated to the VF
+ */
+static int
+ice_vf_set_spoofchk_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+ int err;
+
+ if (vf->spoofchk)
+ err = ice_vsi_ena_spoofchk(vsi);
+ else
+ err = ice_vsi_dis_spoofchk(vsi);
+
+ return err;
+}
+
/**
* ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
* @vf: VF to add MAC filters for
@@ -1227,10 +1288,10 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
struct ice_hw *hw = &vsi->back->hw;
int status;
- if (vf->port_vlan_info)
+ if (ice_vf_is_port_vlan_ena(vf))
status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m,
- vf->port_vlan_info & VLAN_VID_MASK);
- else if (vsi->num_vlan > 1)
+ ice_vf_get_port_vlan_id(vf));
+ else if (ice_vsi_has_non_zero_vlans(vsi))
status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m);
else
status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0);
@@ -1250,10 +1311,10 @@ ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
struct ice_hw *hw = &vsi->back->hw;
int status;
- if (vf->port_vlan_info)
+ if (ice_vf_is_port_vlan_ena(vf))
status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m,
- vf->port_vlan_info & VLAN_VID_MASK);
- else if (vsi->num_vlan > 1)
+ ice_vf_get_port_vlan_id(vf));
+ else if (ice_vsi_has_non_zero_vlans(vsi))
status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m);
else
status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0);
@@ -1338,7 +1399,7 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
vf->vf_id);
- if (ice_vf_rebuild_host_vlan_cfg(vf))
+ if (ice_vf_rebuild_host_vlan_cfg(vf, vsi))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
@@ -1346,6 +1407,10 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n",
vf->vf_id);
+ if (ice_vf_set_spoofchk_cfg(vf, vsi))
+ dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n",
+ vf->vf_id);
+
/* rebuild aggregator node config for main VF VSI */
ice_vf_rebuild_aggregator_node_cfg(vsi);
}
@@ -1406,6 +1471,7 @@ static void ice_vf_set_initialized(struct ice_vf *vf)
clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+ memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps));
}
/**
@@ -1623,7 +1689,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
*/
if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
- if (vf->port_vlan_info || vsi->num_vlan)
+ if (ice_vf_is_port_vlan_ena(vf) || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
@@ -1732,6 +1798,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf)
*/
static int ice_init_vf_vsi_res(struct ice_vf *vf)
{
+ struct ice_vsi_vlan_ops *vlan_ops;
struct ice_pf *pf = vf->pf;
u8 broadcast[ETH_ALEN];
struct ice_vsi *vsi;
@@ -1745,13 +1812,21 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
if (!vsi)
return -ENOMEM;
- err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
+ err = ice_vsi_add_vlan_zero(vsi);
if (err) {
dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
vf->vf_id);
goto release_vsi;
}
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ err = vlan_ops->ena_rx_filtering(vsi);
+ if (err) {
+ dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n",
+ vf->vf_id);
+ goto release_vsi;
+ }
+
eth_broadcast_addr(broadcast);
err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
if (err) {
@@ -1760,6 +1835,13 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
goto release_vsi;
}
+ err = ice_vf_set_spoofchk_cfg(vf, vsi);
+ if (err) {
+ dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n",
+ vf->vf_id);
+ goto release_vsi;
+ }
+
vf->num_mac = 1;
return 0;
@@ -2239,7 +2321,7 @@ static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
max_frame_size = pi->phy.link_info.max_frame_size;
- if (vf->port_vlan_info)
+ if (ice_vf_is_port_vlan_ena(vf))
max_frame_size -= VLAN_HLEN;
return max_frame_size;
@@ -2288,8 +2370,33 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
goto err;
}
- if (!vsi->info.pvid)
- vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+ /* VLAN offloads based on current device configuration */
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN_V2;
+ } else if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) {
+ /* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for
+ * these two conditions, which amounts to guest VLAN filtering
+ * and offloads being based on the inner VLAN or the
+ * inner/single VLAN respectively and don't allow VF to
+ * negotiate VIRTCHNL_VF_OFFLOAD in any other cases
+ */
+ if (ice_is_dvm_ena(&pf->hw) && ice_vf_is_port_vlan_ena(vf)) {
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+ } else if (!ice_is_dvm_ena(&pf->hw) &&
+ !ice_vf_is_port_vlan_ena(vf)) {
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+ /* configure backward compatible support for VFs that
+ * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is
+ * configured in SVM, and no port VLAN is configured
+ */
+ ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi);
+ } else if (ice_is_dvm_ena(&pf->hw)) {
+ /* configure software offloaded VLAN support when DVM
+ * is enabled, but no port VLAN is enabled
+ */
+ ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi);
+ }
+ }
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
@@ -2892,7 +2999,6 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
- struct ice_vsi_ctx *ctx;
struct ice_vsi *vf_vsi;
struct device *dev;
struct ice_vf *vf;
@@ -2925,37 +3031,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
return 0;
}
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- ctx->info.sec_flags = vf_vsi->info.sec_flags;
- ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
- if (ena) {
- ctx->info.sec_flags |=
- ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
- (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
- } else {
- ctx->info.sec_flags &=
- ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
- (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
- }
-
- ret = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
- if (ret) {
- dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d\n",
- ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, ret);
- goto out;
- }
-
- /* only update spoofchk state and VSI context on success */
- vf_vsi->info.sec_flags = ctx->info.sec_flags;
- vf->spoofchk = ena;
+ if (ena)
+ ret = ice_vsi_ena_spoofchk(vf_vsi);
+ else
+ ret = ice_vsi_dis_spoofchk(vf_vsi);
+ if (ret)
+ dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n",
+ ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret);
+ else
+ vf->spoofchk = ena;
-out:
- kfree(ctx);
return ret;
}
@@ -2995,6 +3080,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
bool rm_promisc, alluni = false, allmulti = false;
struct virtchnl_promisc_info *info =
(struct virtchnl_promisc_info *)msg;
+ struct ice_vsi_vlan_ops *vlan_ops;
int mcast_err = 0, ucast_err = 0;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
@@ -3033,16 +3119,15 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
rm_promisc = !allmulti && !alluni;
- if (vsi->num_vlan || vf->port_vlan_info) {
- if (rm_promisc)
- ret = ice_cfg_vlan_pruning(vsi, true);
- else
- ret = ice_cfg_vlan_pruning(vsi, false);
- if (ret) {
- dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ if (rm_promisc)
+ ret = vlan_ops->ena_rx_filtering(vsi);
+ else
+ ret = vlan_ops->dis_rx_filtering(vsi);
+ if (ret) {
+ dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
}
if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
@@ -3069,7 +3154,8 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
} else {
u8 mcast_m, ucast_m;
- if (vf->port_vlan_info || vsi->num_vlan > 1) {
+ if (ice_vf_is_port_vlan_ena(vf) ||
+ ice_vsi_has_non_zero_vlans(vsi)) {
mcast_m = ICE_MCAST_VLAN_PROMISC_BITS;
ucast_m = ICE_UCAST_VLAN_PROMISC_BITS;
} else {
@@ -3652,7 +3738,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
/* add space for the port VLAN since the VF driver is not
* expected to account for it in the MTU calculation
*/
- if (vf->port_vlan_info)
+ if (ice_vf_is_port_vlan_ena(vf))
vsi->max_frame += VLAN_HLEN;
if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
@@ -4064,6 +4150,33 @@ error_param:
}
/**
+ * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported
+ * @hw: hardware structure used to check the VLAN mode
+ * @vlan_proto: VLAN TPID being checked
+ *
+ * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q
+ * and ETH_P_8021AD are supported. If the device is configured in Single VLAN
+ * Mode (SVM), then only ETH_P_8021Q is supported.
+ */
+static bool
+ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto)
+{
+ bool is_supported = false;
+
+ switch (vlan_proto) {
+ case ETH_P_8021Q:
+ is_supported = true;
+ break;
+ case ETH_P_8021AD:
+ if (ice_is_dvm_ena(hw))
+ is_supported = true;
+ break;
+ }
+
+ return is_supported;
+}
+
+/**
* ice_set_vf_port_vlan
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -4078,9 +4191,9 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ u16 local_vlan_proto = ntohs(vlan_proto);
struct device *dev;
struct ice_vf *vf;
- u16 vlanprio;
int ret;
dev = ice_pf_to_dev(pf);
@@ -4093,8 +4206,9 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
return -EINVAL;
}
- if (vlan_proto != htons(ETH_P_8021Q)) {
- dev_err(dev, "VF VLAN protocol is not supported\n");
+ if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) {
+ dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n",
+ local_vlan_proto);
return -EPROTONOSUPPORT;
}
@@ -4103,21 +4217,21 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
if (ret)
return ret;
- vlanprio = vlan_id | (qos << VLAN_PRIO_SHIFT);
-
- if (vf->port_vlan_info == vlanprio) {
+ if (ice_vf_get_port_vlan_prio(vf) == qos &&
+ ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto &&
+ ice_vf_get_port_vlan_id(vf) == vlan_id) {
/* duplicate request, so just return success */
- dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
+ dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n",
+ vlan_id, qos, local_vlan_proto);
return 0;
}
mutex_lock(&vf->cfg_lock);
- vf->port_vlan_info = vlanprio;
-
- if (vf->port_vlan_info)
- dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
- vlan_id, qos, vf_id);
+ vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos);
+ if (ice_vf_is_port_vlan_ena(vf))
+ dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n",
+ vlan_id, qos, local_vlan_proto, vf_id);
else
dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
@@ -4139,6 +4253,83 @@ static bool ice_vf_vlan_offload_ena(u32 caps)
}
/**
+ * ice_is_vlan_promisc_allowed - check if VLAN promiscuous config is allowed
+ * @vf: VF used to determine if VLAN promiscuous config is allowed
+ */
+static bool ice_is_vlan_promisc_allowed(struct ice_vf *vf)
+{
+ if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+ test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
+ test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, vf->pf->flags))
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_vf_ena_vlan_promisc - Enable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to enable VLAN promiscuous mode
+ * @vlan: VLAN used to enable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+ int status;
+
+ status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+ vlan->vid);
+ if (status && status != -EEXIST)
+ return status;
+
+ return 0;
+}
+
+/**
+ * ice_vf_dis_vlan_promisc - Disable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to disable VLAN promiscuous mode for
+ * @vlan: VLAN used to disable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_dis_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+ int status;
+
+ status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+ vlan->vid);
+ if (status && status != -ENOENT)
+ return status;
+
+ return 0;
+}
+
+/**
+ * ice_vf_has_max_vlans - check if VF already has the max allowed VLAN filters
+ * @vf: VF to check against
+ * @vsi: VF's VSI
+ *
+ * If the VF is trusted then the VF is allowed to add as many VLANs as it
+ * wants to, so return false.
+ *
+ * When the VF is untrusted compare the number of non-zero VLANs + 1 to the max
+ * allowed VLANs for an untrusted VF. Return the result of this comparison.
+ */
+static bool ice_vf_has_max_vlans(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+ if (ice_is_vf_trusted(vf))
+ return false;
+
+#define ICE_VF_ADDED_VLAN_ZERO_FLTRS 1
+ return ((ice_vsi_num_non_zero_vlans(vsi) +
+ ICE_VF_ADDED_VLAN_ZERO_FLTRS) >= ICE_MAX_VLAN_PER_VF);
+}
+
+/**
* ice_vc_process_vlan_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -4155,9 +4346,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
bool vlan_promisc = false;
struct ice_vsi *vsi;
struct device *dev;
- struct ice_hw *hw;
int status = 0;
- u8 promisc_m;
int i;
dev = ice_pf_to_dev(pf);
@@ -4185,15 +4374,13 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
}
}
- hw = &pf->hw;
vsi = ice_get_vf_vsi(vf);
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- if (add_v && !ice_is_vf_trusted(vf) &&
- vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+ if (add_v && ice_vf_has_max_vlans(vf, vsi)) {
dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being not trusted,
@@ -4202,22 +4389,28 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
goto error_param;
}
- if (vsi->info.pvid) {
+ /* in DVM a VF can add/delete inner VLAN filters when
+ * VIRTCHNL_VF_OFFLOAD_VLAN is negotiated, so only reject in SVM
+ */
+ if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&pf->hw)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
- test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
- test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags))
- vlan_promisc = true;
+ /* in DVM VLAN promiscuous is based on the outer VLAN, which would be
+ * the port VLAN if VIRTCHNL_VF_OFFLOAD_VLAN was negotiated, so only
+ * allow vlan_promisc = true in SVM and if no port VLAN is configured
+ */
+ vlan_promisc = ice_is_vlan_promisc_allowed(vf) &&
+ !ice_is_dvm_ena(&pf->hw) &&
+ !ice_vf_is_port_vlan_ena(vf);
if (add_v) {
for (i = 0; i < vfl->num_elements; i++) {
u16 vid = vfl->vlan_id[i];
+ struct ice_vlan vlan;
- if (!ice_is_vf_trusted(vf) &&
- vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+ if (ice_vf_has_max_vlans(vf, vsi)) {
dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being
@@ -4234,29 +4427,23 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (!vid)
continue;
- status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
+ vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+ status = vsi->inner_vlan_ops.add_vlan(vsi, &vlan);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- /* Enable VLAN pruning when non-zero VLAN is added */
- if (!vlan_promisc && vid &&
- !ice_vsi_is_vlan_pruning_ena(vsi)) {
- status = ice_cfg_vlan_pruning(vsi, true);
- if (status) {
+ /* Enable VLAN filtering on first non-zero VLAN */
+ if (!vlan_promisc && vid && !ice_is_dvm_ena(&pf->hw)) {
+ if (vsi->inner_vlan_ops.ena_rx_filtering(vsi)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
vid, status);
goto error_param;
}
} else if (vlan_promisc) {
- /* Enable Ucast/Mcast VLAN promiscuous mode */
- promisc_m = ICE_PROMISC_VLAN_TX |
- ICE_PROMISC_VLAN_RX;
-
- status = ice_set_vsi_promisc(hw, vsi->idx,
- promisc_m, vid);
+ status = ice_vf_ena_vlan_promisc(vsi, &vlan);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
@@ -4277,6 +4464,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
num_vf_vlan = vsi->num_vlan;
for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
u16 vid = vfl->vlan_id[i];
+ struct ice_vlan vlan;
/* we add VLAN 0 by default for each VF so we can enable
* Tx VLAN anti-spoof without triggering MDD events so
@@ -4285,28 +4473,19 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (!vid)
continue;
- /* Make sure ice_vsi_kill_vlan is successful before
- * updating VLAN information
- */
- status = ice_vsi_kill_vlan(vsi, vid);
+ vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+ status = vsi->inner_vlan_ops.del_vlan(vsi, &vlan);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- /* Disable VLAN pruning when only VLAN 0 is left */
- if (vsi->num_vlan == 1 &&
- ice_vsi_is_vlan_pruning_ena(vsi))
- ice_cfg_vlan_pruning(vsi, false);
+ /* Disable VLAN filtering when only VLAN 0 is left */
+ if (!ice_vsi_has_non_zero_vlans(vsi))
+ vsi->inner_vlan_ops.dis_rx_filtering(vsi);
- /* Disable Unicast/Multicast VLAN promiscuous mode */
- if (vlan_promisc) {
- promisc_m = ICE_PROMISC_VLAN_TX |
- ICE_PROMISC_VLAN_RX;
-
- ice_clear_vsi_promisc(hw, vsi->idx,
- promisc_m, vid);
- }
+ if (vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
}
}
@@ -4366,7 +4545,7 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
}
vsi = ice_get_vf_vsi(vf);
- if (ice_vsi_manage_vlan_stripping(vsi, true))
+ if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
error_param:
@@ -4401,7 +4580,7 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
goto error_param;
}
- if (ice_vsi_manage_vlan_stripping(vsi, false))
+ if (vsi->inner_vlan_ops.dis_stripping(vsi))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
error_param:
@@ -4413,11 +4592,8 @@ error_param:
* ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
* @vf: VF to enable/disable VLAN stripping for on initialization
*
- * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if
- * the flag is cleared then we want to disable stripping. For example, the flag
- * will be cleared when port VLANs are configured by the administrator before
- * passing the VF to the guest or if the AVF driver doesn't support VLAN
- * offloads.
+ * Set the default for VLAN stripping based on whether a port VLAN is configured
+ * and the current VLAN mode of the device.
*/
static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
{
@@ -4426,14 +4602,965 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
if (!vsi)
return -EINVAL;
- /* don't modify stripping if port VLAN is configured */
- if (vsi->info.pvid)
+ /* don't modify stripping if port VLAN is configured in SVM since the
+ * port VLAN is based on the inner/single VLAN in SVM
+ */
+ if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw))
return 0;
if (ice_vf_vlan_offload_ena(vf->driver_caps))
- return ice_vsi_manage_vlan_stripping(vsi, true);
+ return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
+ else
+ return vsi->inner_vlan_ops.dis_stripping(vsi);
+}
+
+static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf)
+{
+ if (vf->trusted)
+ return VLAN_N_VID;
else
- return ice_vsi_manage_vlan_stripping(vsi, false);
+ return ICE_MAX_VLAN_PER_VF;
+}
+
+/**
+ * ice_vf_outer_vlan_not_allowed - check outer VLAN can be used when the device is in DVM
+ * @vf: VF that being checked for
+ */
+static bool ice_vf_outer_vlan_not_allowed(struct ice_vf *vf)
+{
+ if (ice_vf_is_port_vlan_ena(vf))
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_vc_set_dvm_caps - set VLAN capabilities when the device is in DVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF should use the inner
+ * filtering/offload capabilities since the port VLAN is using the outer VLAN
+ * capabilies.
+ */
+static void
+ice_vc_set_dvm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+ struct virtchnl_vlan_supported_caps *supported_caps;
+
+ if (ice_vf_outer_vlan_not_allowed(vf)) {
+ /* until support for inner VLAN filtering is added when a port
+ * VLAN is configured, only support software offloaded inner
+ * VLANs when a port VLAN is confgured in DVM
+ */
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ } else {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_AND;
+ caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_XOR |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_XOR |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ }
+
+ caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+}
+
+/**
+ * ice_vc_set_svm_caps - set VLAN capabilities when the device is in SVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF does not have any VLAN
+ * filtering or offload capabilities since the port VLAN is using the inner VLAN
+ * capabilities in single VLAN mode (SVM). Otherwise allow the VF to use inner
+ * VLAN fitlering and offload capabilities.
+ */
+static void
+ice_vc_set_svm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+ struct virtchnl_vlan_supported_caps *supported_caps;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->offloads.ethertype_match = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->filtering.max_filters = 0;
+ } else {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+ }
+}
+
+/**
+ * ice_vc_get_offload_vlan_v2_caps - determine VF's VLAN capabilities
+ * @vf: VF to determine VLAN capabilities for
+ *
+ * This will only be called if the VF and PF successfully negotiated
+ * VIRTCHNL_VF_OFFLOAD_VLAN_V2.
+ *
+ * Set VLAN capabilities based on the current VLAN mode and whether a port VLAN
+ * is configured or not.
+ */
+static int ice_vc_get_offload_vlan_v2_caps(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_caps *caps = NULL;
+ int err, len = 0;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ goto out;
+ }
+ len = sizeof(*caps);
+
+ if (ice_is_dvm_ena(&vf->pf->hw))
+ ice_vc_set_dvm_caps(vf, caps);
+ else
+ ice_vc_set_svm_caps(vf, caps);
+
+ /* store negotiated caps to prevent invalid VF messages */
+ memcpy(&vf->vlan_v2_caps, caps, sizeof(*caps));
+
+out:
+ err = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+ v_ret, (u8 *)caps, len);
+ kfree(caps);
+ return err;
+}
+
+/**
+ * ice_vc_validate_vlan_tpid - validate VLAN TPID
+ * @filtering_caps: negotiated/supported VLAN filtering capabilities
+ * @tpid: VLAN TPID used for validation
+ *
+ * Convert the VLAN TPID to a VIRTCHNL_VLAN_ETHERTYPE_* and then compare against
+ * the negotiated/supported filtering caps to see if the VLAN TPID is valid.
+ */
+static bool ice_vc_validate_vlan_tpid(u16 filtering_caps, u16 tpid)
+{
+ enum virtchnl_vlan_support vlan_ethertype = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ switch (tpid) {
+ case ETH_P_8021Q:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ break;
+ case ETH_P_8021AD:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_88A8;
+ break;
+ case ETH_P_QINQ1:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_9100;
+ break;
+ }
+
+ if (!(filtering_caps & vlan_ethertype))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_is_valid_vlan - validate the virtchnl_vlan
+ * @vc_vlan: virtchnl_vlan to validate
+ *
+ * If the VLAN TCI and VLAN TPID are 0, then this filter is invalid, so return
+ * false. Otherwise return true.
+ */
+static bool ice_vc_is_valid_vlan(struct virtchnl_vlan *vc_vlan)
+{
+ if (!vc_vlan->tci || !vc_vlan->tpid)
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_validate_vlan_filter_list - validate the filter list from the VF
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF. If any of
+ * the checks fail then return false. Otherwise return true.
+ */
+static bool
+ice_vc_validate_vlan_filter_list(struct virtchnl_vlan_filtering_caps *vfc,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ u16 i;
+
+ if (!vfl->num_elements)
+ return false;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_supported_caps *filtering_support =
+ &vfc->filtering_support;
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *outer = &vlan_fltr->outer;
+ struct virtchnl_vlan *inner = &vlan_fltr->inner;
+
+ if ((ice_vc_is_valid_vlan(outer) &&
+ filtering_support->outer == VIRTCHNL_VLAN_UNSUPPORTED) ||
+ (ice_vc_is_valid_vlan(inner) &&
+ filtering_support->inner == VIRTCHNL_VLAN_UNSUPPORTED))
+ return false;
+
+ if ((outer->tci_mask &&
+ !(filtering_support->outer & VIRTCHNL_VLAN_FILTER_MASK)) ||
+ (inner->tci_mask &&
+ !(filtering_support->inner & VIRTCHNL_VLAN_FILTER_MASK)))
+ return false;
+
+ if (((outer->tci & VLAN_PRIO_MASK) &&
+ !(filtering_support->outer & VIRTCHNL_VLAN_PRIO)) ||
+ ((inner->tci & VLAN_PRIO_MASK) &&
+ !(filtering_support->inner & VIRTCHNL_VLAN_PRIO)))
+ return false;
+
+ if ((ice_vc_is_valid_vlan(outer) &&
+ !ice_vc_validate_vlan_tpid(filtering_support->outer, outer->tpid)) ||
+ (ice_vc_is_valid_vlan(inner) &&
+ !ice_vc_validate_vlan_tpid(filtering_support->inner, inner->tpid)))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_vc_to_vlan - transform from struct virtchnl_vlan to struct ice_vlan
+ * @vc_vlan: struct virtchnl_vlan to transform
+ */
+static struct ice_vlan ice_vc_to_vlan(struct virtchnl_vlan *vc_vlan)
+{
+ struct ice_vlan vlan = { 0 };
+
+ vlan.prio = (vc_vlan->tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ vlan.vid = vc_vlan->tci & VLAN_VID_MASK;
+ vlan.tpid = vc_vlan->tpid;
+
+ return vlan;
+}
+
+/**
+ * ice_vc_vlan_action - action to perform on the virthcnl_vlan
+ * @vsi: VF's VSI used to perform the action
+ * @vlan_action: function to perform the action with (i.e. add/del)
+ * @vlan: VLAN filter to perform the action with
+ */
+static int
+ice_vc_vlan_action(struct ice_vsi *vsi,
+ int (*vlan_action)(struct ice_vsi *, struct ice_vlan *),
+ struct ice_vlan *vlan)
+{
+ int err;
+
+ err = vlan_action(vsi, vlan);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_vc_del_vlans - delete VLAN(s) from the virtchnl filter list
+ * @vf: VF used to delete the VLAN(s)
+ * @vsi: VF's VSI used to delete the VLAN(s)
+ * @vfl: virthchnl filter list used to delete the filters
+ */
+static int
+ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+ int err;
+ u16 i;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *vc_vlan;
+
+ vc_vlan = &vlan_fltr->outer;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->outer_vlan_ops.del_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ if (vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
+ }
+
+ vc_vlan = &vlan_fltr->inner;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->inner_vlan_ops.del_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ /* no support for VLAN promiscuous on inner VLAN unless
+ * we are in Single VLAN Mode (SVM)
+ */
+ if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_remove_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_DEL_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_remove_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_vlan_filter_list_v2 *vfl =
+ (struct virtchnl_vlan_filter_list_v2 *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!ice_vc_validate_vlan_filter_list(&vf->vlan_v2_caps.filtering,
+ vfl)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (ice_vc_del_vlans(vf, vsi, vfl))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN_V2, v_ret, NULL,
+ 0);
+}
+
+/**
+ * ice_vc_add_vlans - add VLAN(s) from the virtchnl filter list
+ * @vf: VF used to add the VLAN(s)
+ * @vsi: VF's VSI used to add the VLAN(s)
+ * @vfl: virthchnl filter list used to add the filters
+ */
+static int
+ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+ int err;
+ u16 i;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *vc_vlan;
+
+ vc_vlan = &vlan_fltr->outer;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->outer_vlan_ops.add_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ if (vlan_promisc) {
+ err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+ if (err)
+ return err;
+ }
+ }
+
+ vc_vlan = &vlan_fltr->inner;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->inner_vlan_ops.add_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ /* no support for VLAN promiscuous on inner VLAN unless
+ * we are in Single VLAN Mode (SVM)
+ */
+ if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc) {
+ err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_validate_add_vlan_filter_list - validate add filter list from the VF
+ * @vsi: VF VSI used to get number of existing VLAN filters
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF during the
+ * VIRTCHNL_OP_ADD_VLAN_V2 opcode. If any of the checks fail then return false.
+ * Otherwise return true.
+ */
+static bool
+ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi,
+ struct virtchnl_vlan_filtering_caps *vfc,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ u16 num_requested_filters = vsi->num_vlan + vfl->num_elements;
+
+ if (num_requested_filters > vfc->max_filters)
+ return false;
+
+ return ice_vc_validate_vlan_filter_list(vfc, vfl);
+}
+
+/**
+ * ice_vc_add_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_ADD_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_add_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_filter_list_v2 *vfl =
+ (struct virtchnl_vlan_filter_list_v2 *)msg;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_validate_add_vlan_filter_list(vsi,
+ &vf->vlan_v2_caps.filtering,
+ vfl)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (ice_vc_add_vlans(vf, vsi, vfl))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN_V2, v_ret, NULL,
+ 0);
+}
+
+/**
+ * ice_vc_valid_vlan_setting - validate VLAN setting
+ * @negotiated_settings: negotiated VLAN settings during VF init
+ * @ethertype_setting: ethertype(s) requested for the VLAN setting
+ */
+static bool
+ice_vc_valid_vlan_setting(u32 negotiated_settings, u32 ethertype_setting)
+{
+ if (ethertype_setting && !(negotiated_settings & ethertype_setting))
+ return false;
+
+ /* only allow a single VIRTCHNL_VLAN_ETHERTYPE if
+ * VIRTHCNL_VLAN_ETHERTYPE_AND is not negotiated/supported
+ */
+ if (!(negotiated_settings & VIRTCHNL_VLAN_ETHERTYPE_AND) &&
+ hweight32(ethertype_setting) > 1)
+ return false;
+
+ /* ability to modify the VLAN setting was not negotiated */
+ if (!(negotiated_settings & VIRTCHNL_VLAN_TOGGLE))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_valid_vlan_setting_msg - validate the VLAN setting message
+ * @caps: negotiated VLAN settings during VF init
+ * @msg: message to validate
+ *
+ * Used to validate any VLAN virtchnl message sent as a
+ * virtchnl_vlan_setting structure. Validates the message against the
+ * negotiated/supported caps during VF driver init.
+ */
+static bool
+ice_vc_valid_vlan_setting_msg(struct virtchnl_vlan_supported_caps *caps,
+ struct virtchnl_vlan_setting *msg)
+{
+ if ((!msg->outer_ethertype_setting &&
+ !msg->inner_ethertype_setting) ||
+ (!caps->outer && !caps->inner))
+ return false;
+
+ if (msg->outer_ethertype_setting &&
+ !ice_vc_valid_vlan_setting(caps->outer,
+ msg->outer_ethertype_setting))
+ return false;
+
+ if (msg->inner_ethertype_setting &&
+ !ice_vc_valid_vlan_setting(caps->inner,
+ msg->inner_ethertype_setting))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_get_tpid - transform from VIRTCHNL_VLAN_ETHERTYPE_* to VLAN TPID
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* used to get VLAN TPID
+ * @tpid: VLAN TPID to populate
+ */
+static int ice_vc_get_tpid(u32 ethertype_setting, u16 *tpid)
+{
+ switch (ethertype_setting) {
+ case VIRTCHNL_VLAN_ETHERTYPE_8100:
+ *tpid = ETH_P_8021Q;
+ break;
+ case VIRTCHNL_VLAN_ETHERTYPE_88A8:
+ *tpid = ETH_P_8021AD;
+ break;
+ case VIRTCHNL_VLAN_ETHERTYPE_9100:
+ *tpid = ETH_P_QINQ1;
+ break;
+ default:
+ *tpid = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_ena_vlan_offload - enable VLAN offload based on the ethertype_setting
+ * @vsi: VF's VSI used to enable the VLAN offload
+ * @ena_offload: function used to enable the VLAN offload
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* to enable offloads for
+ */
+static int
+ice_vc_ena_vlan_offload(struct ice_vsi *vsi,
+ int (*ena_offload)(struct ice_vsi *vsi, u16 tpid),
+ u32 ethertype_setting)
+{
+ u16 tpid;
+ int err;
+
+ err = ice_vc_get_tpid(ethertype_setting, &tpid);
+ if (err)
+ return err;
+
+ err = ena_offload(vsi, tpid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+#define ICE_L2TSEL_QRX_CONTEXT_REG_IDX 3
+#define ICE_L2TSEL_BIT_OFFSET 23
+enum ice_l2tsel {
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND,
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1,
+};
+
+/**
+ * ice_vsi_update_l2tsel - update l2tsel field for all Rx rings on this VSI
+ * @vsi: VSI used to update l2tsel on
+ * @l2tsel: l2tsel setting requested
+ *
+ * Use the l2tsel setting to update all of the Rx queue context bits for l2tsel.
+ * This will modify which descriptor field the first offloaded VLAN will be
+ * stripped into.
+ */
+static void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 l2tsel_bit;
+ int i;
+
+ if (l2tsel == ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND)
+ l2tsel_bit = 0;
+ else
+ l2tsel_bit = BIT(ICE_L2TSEL_BIT_OFFSET);
+
+ for (i = 0; i < vsi->alloc_rxq; i++) {
+ u16 pfq = vsi->rxq_map[i];
+ u32 qrx_context_offset;
+ u32 regval;
+
+ qrx_context_offset =
+ QRX_CONTEXT(ICE_L2TSEL_QRX_CONTEXT_REG_IDX, pfq);
+
+ regval = rd32(hw, qrx_context_offset);
+ regval &= ~BIT(ICE_L2TSEL_BIT_OFFSET);
+ regval |= l2tsel_bit;
+ wr32(hw, qrx_context_offset, regval);
+ }
+}
+
+/**
+ * ice_vc_ena_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *stripping_support;
+ struct virtchnl_vlan_setting *strip_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+ if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = strip_msg->outer_ethertype_setting;
+ if (ethertype_setting) {
+ if (ice_vc_ena_vlan_offload(vsi,
+ vsi->outer_vlan_ops.ena_stripping,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ } else {
+ enum ice_l2tsel l2tsel =
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND;
+
+ /* PF tells the VF that the outer VLAN tag is always
+ * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+ * inner is always extracted to
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+ * support outer stripping so the first tag always ends
+ * up in L2TAG2_2ND and the second/inner tag, if
+ * enabled, is extracted in L2TAG1.
+ */
+ ice_vsi_update_l2tsel(vsi, l2tsel);
+ }
+ }
+
+ ethertype_setting = strip_msg->inner_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_stripping,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *stripping_support;
+ struct virtchnl_vlan_setting *strip_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+ if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = strip_msg->outer_ethertype_setting;
+ if (ethertype_setting) {
+ if (vsi->outer_vlan_ops.dis_stripping(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ } else {
+ enum ice_l2tsel l2tsel =
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1;
+
+ /* PF tells the VF that the outer VLAN tag is always
+ * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+ * inner is always extracted to
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+ * support inner stripping while outer stripping is
+ * disabled so that the first and only tag is extracted
+ * in L2TAG1.
+ */
+ ice_vsi_update_l2tsel(vsi, l2tsel);
+ }
+ }
+
+ ethertype_setting = strip_msg->inner_ethertype_setting;
+ if (ethertype_setting && vsi->inner_vlan_ops.dis_stripping(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_ena_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_ena_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *insertion_support;
+ struct virtchnl_vlan_setting *insertion_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+ if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->outer_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->outer_vlan_ops.ena_insertion,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->inner_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_insertion,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_dis_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *insertion_support;
+ struct virtchnl_vlan_setting *insertion_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+ if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->outer_ethertype_setting;
+ if (ethertype_setting && vsi->outer_vlan_ops.dis_insertion(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->inner_ethertype_setting;
+ if (ethertype_setting && vsi->inner_vlan_ops.dis_insertion(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2, v_ret, NULL, 0);
}
static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = {
@@ -4458,6 +5585,13 @@ static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = {
.handle_rss_cfg_msg = ice_vc_handle_rss_cfg,
.add_fdir_fltr_msg = ice_vc_add_fdir_fltr,
.del_fdir_fltr_msg = ice_vc_del_fdir_fltr,
+ .get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps,
+ .add_vlan_v2_msg = ice_vc_add_vlan_v2_msg,
+ .remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg,
+ .ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg,
+ .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg,
+ .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg,
+ .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg,
};
void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops)
@@ -4686,7 +5820,7 @@ error_handler:
case VIRTCHNL_OP_GET_VF_RESOURCES:
err = ops->get_vf_res_msg(vf, msg);
if (ice_vf_init_vlan_stripping(vf))
- dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n",
+ dev_dbg(dev, "Failed to initialize VLAN stripping for VF %d\n",
vf->vf_id);
ice_vc_notify_vf_link_state(vf);
break;
@@ -4751,6 +5885,27 @@ error_handler:
case VIRTCHNL_OP_DEL_RSS_CFG:
err = ops->handle_rss_cfg_msg(vf, msg, false);
break;
+ case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+ err = ops->get_offload_vlan_v2_caps(vf);
+ break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ err = ops->add_vlan_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_VLAN_V2:
+ err = ops->remove_vlan_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+ err = ops->ena_vlan_stripping_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+ err = ops->dis_vlan_stripping_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+ err = ops->ena_vlan_insertion_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+ err = ops->dis_vlan_insertion_v2_msg(vf, msg);
+ break;
case VIRTCHNL_OP_UNKNOWN:
default:
dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
@@ -4797,8 +5952,10 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr);
/* VF configuration for VLAN and applicable QoS */
- ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK;
- ivi->qos = (vf->port_vlan_info & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ ivi->vlan = ice_vf_get_port_vlan_id(vf);
+ ivi->qos = ice_vf_get_port_vlan_prio(vf);
+ if (ice_vf_is_port_vlan_ena(vf))
+ ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf));
ivi->trusted = vf->trusted;
ivi->spoofchk = vf->spoofchk;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 752487a1bdd6..4f4961043638 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -5,6 +5,7 @@
#define _ICE_VIRTCHNL_PF_H_
#include "ice.h"
#include "ice_virtchnl_fdir.h"
+#include "ice_vsi_vlan_ops.h"
/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
#define ICE_MAX_VLAN_PER_VF 8
@@ -94,6 +95,13 @@ struct ice_vc_vf_ops {
int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add);
int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
+ int (*get_offload_vlan_v2_caps)(struct ice_vf *vf);
+ int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
};
/* VF information structure */
@@ -119,7 +127,8 @@ struct ice_vf {
struct ice_time_mac legacy_last_added_umac;
DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);
DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
- u16 port_vlan_info; /* Port VLAN ID and QoS */
+ struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */
+ struct virtchnl_vlan_caps vlan_v2_caps;
u8 pf_set_mac:1; /* VF MAC address set by VMM admin */
u8 trusted:1;
u8 spoofchk:1;
@@ -210,6 +219,7 @@ int
ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
+bool ice_vf_is_port_vlan_ena(struct ice_vf *vf);
#else /* CONFIG_PCI_IOV */
static inline void ice_process_vflr_event(struct ice_pf *pf) { }
static inline void ice_free_vfs(struct ice_pf *pf) { }
@@ -342,5 +352,10 @@ static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf)
{
return false;
}
+
+static inline bool ice_vf_is_port_vlan_ena(struct ice_vf __always_unused *vf)
+{
+ return false;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan.h b/drivers/net/ethernet/intel/ice/ice_vlan.h
new file mode 100644
index 000000000000..bc4550a03173
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_H_
+#define _ICE_VLAN_H_
+
+#include <linux/types.h>
+#include "ice_type.h"
+
+struct ice_vlan {
+ u16 tpid;
+ u16 vid;
+ u8 prio;
+};
+
+#define ICE_VLAN(tpid, vid, prio) ((struct ice_vlan){ tpid, vid, prio })
+
+#endif /* _ICE_VLAN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
new file mode 100644
index 000000000000..1b618de592b7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_pkg_get_supported_vlan_mode - determine if DDP supports Double VLAN mode
+ * @hw: pointer to the HW struct
+ * @dvm: output variable to determine if DDP supports DVM(true) or SVM(false)
+ */
+static int
+ice_pkg_get_supported_vlan_mode(struct ice_hw *hw, bool *dvm)
+{
+ u16 meta_init_size = sizeof(struct ice_meta_init_section);
+ struct ice_meta_init_section *sect;
+ struct ice_buf_build *bld;
+ int status;
+
+ /* if anything fails, we assume there is no DVM support */
+ *dvm = false;
+
+ bld = ice_pkg_buf_alloc_single_section(hw,
+ ICE_SID_RXPARSER_METADATA_INIT,
+ meta_init_size, (void **)&sect);
+ if (!bld)
+ return -ENOMEM;
+
+ /* only need to read a single section */
+ sect->count = cpu_to_le16(1);
+ sect->offset = cpu_to_le16(ICE_META_VLAN_MODE_ENTRY);
+
+ status = ice_aq_upload_section(hw,
+ (struct ice_buf_hdr *)ice_pkg_buf(bld),
+ ICE_PKG_BUF_SIZE, NULL);
+ if (!status) {
+ DECLARE_BITMAP(entry, ICE_META_INIT_BITS);
+ u32 arr[ICE_META_INIT_DW_CNT];
+ u16 i;
+
+ /* convert to host bitmap format */
+ for (i = 0; i < ICE_META_INIT_DW_CNT; i++)
+ arr[i] = le32_to_cpu(sect->entry.bm[i]);
+
+ bitmap_from_arr32(entry, arr, (u16)ICE_META_INIT_BITS);
+
+ /* check if DVM is supported */
+ *dvm = test_bit(ICE_META_VLAN_MODE_BIT, entry);
+ }
+
+ ice_pkg_buf_free(hw, bld);
+
+ return status;
+}
+
+/**
+ * ice_aq_get_vlan_mode - get the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @get_params: structure FW fills in based on the current VLAN mode config
+ *
+ * Get VLAN Mode Parameters (0x020D)
+ */
+static int
+ice_aq_get_vlan_mode(struct ice_hw *hw,
+ struct ice_aqc_get_vlan_mode *get_params)
+{
+ struct ice_aq_desc desc;
+
+ if (!get_params)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc,
+ ice_aqc_opc_get_vlan_mode_parameters);
+
+ return ice_aq_send_cmd(hw, &desc, get_params, sizeof(*get_params),
+ NULL);
+}
+
+/**
+ * ice_aq_is_dvm_ena - query FW to check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * Returns true if the hardware/firmware is configured in double VLAN mode,
+ * else return false signaling that the hardware/firmware is configured in
+ * single VLAN mode.
+ *
+ * Also, return false if this call fails for any reason (i.e. firmware doesn't
+ * support this AQ call).
+ */
+static bool ice_aq_is_dvm_ena(struct ice_hw *hw)
+{
+ struct ice_aqc_get_vlan_mode get_params = { 0 };
+ int status;
+
+ status = ice_aq_get_vlan_mode(hw, &get_params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_AQ, "Failed to get VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return (get_params.vlan_mode & ICE_AQ_VLAN_MODE_DVM_ENA);
+}
+
+/**
+ * ice_is_dvm_ena - check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * The device is configured in single or double VLAN mode on initialization and
+ * this cannot be dynamically changed during runtime. Based on this there is no
+ * need to make an AQ call every time the driver needs to know the VLAN mode.
+ * Instead, use the cached VLAN mode.
+ */
+bool ice_is_dvm_ena(struct ice_hw *hw)
+{
+ return hw->dvm_ena;
+}
+
+/**
+ * ice_cache_vlan_mode - cache VLAN mode after DDP is downloaded
+ * @hw: pointer to the HW structure
+ *
+ * This is only called after downloading the DDP and after the global
+ * configuration lock has been released because all ports on a device need to
+ * cache the VLAN mode.
+ */
+static void ice_cache_vlan_mode(struct ice_hw *hw)
+{
+ hw->dvm_ena = ice_aq_is_dvm_ena(hw) ? true : false;
+}
+
+/**
+ * ice_pkg_supports_dvm - find out if DDP supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_pkg_supports_dvm(struct ice_hw *hw)
+{
+ bool pkg_supports_dvm;
+ int status;
+
+ status = ice_pkg_get_supported_vlan_mode(hw, &pkg_supports_dvm);
+ if (status) {
+ ice_debug(hw, ICE_DBG_PKG, "Failed to get supported VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return pkg_supports_dvm;
+}
+
+/**
+ * ice_fw_supports_dvm - find out if FW supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_fw_supports_dvm(struct ice_hw *hw)
+{
+ struct ice_aqc_get_vlan_mode get_vlan_mode = { 0 };
+ int status;
+
+ /* If firmware returns success, then it supports DVM, else it only
+ * supports SVM
+ */
+ status = ice_aq_get_vlan_mode(hw, &get_vlan_mode);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to get VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_is_dvm_supported - check if Double VLAN Mode is supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if Double VLAN Mode (DVM) is supported and false if only Single
+ * VLAN Mode (SVM) is supported. In order for DVM to be supported the DDP and
+ * firmware must support it, otherwise only SVM is supported. This function
+ * should only be called while the global config lock is held and after the
+ * package has been successfully downloaded.
+ */
+static bool ice_is_dvm_supported(struct ice_hw *hw)
+{
+ if (!ice_pkg_supports_dvm(hw)) {
+ ice_debug(hw, ICE_DBG_PKG, "DDP doesn't support DVM\n");
+ return false;
+ }
+
+ if (!ice_fw_supports_dvm(hw)) {
+ ice_debug(hw, ICE_DBG_PKG, "FW doesn't support DVM\n");
+ return false;
+ }
+
+ return true;
+}
+
+#define ICE_EXTERNAL_VLAN_ID_FV_IDX 11
+#define ICE_SW_LKUP_VLAN_LOC_LKUP_IDX 1
+#define ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX 2
+#define ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX 2
+#define ICE_PKT_FLAGS_0_TO_15_FV_IDX 1
+#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000
+static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = {
+ {
+ /* Update recipe ICE_SW_LKUP_VLAN to filter based on the
+ * outer/single VLAN in DVM
+ */
+ .rid = ICE_SW_LKUP_VLAN,
+ .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+ .ignore_valid = true,
+ .mask = 0,
+ .mask_valid = false, /* use pre-existing mask */
+ .lkup_idx = ICE_SW_LKUP_VLAN_LOC_LKUP_IDX,
+ },
+ {
+ /* Update recipe ICE_SW_LKUP_VLAN to filter based on the VLAN
+ * packet flags to support VLAN filtering on multiple VLAN
+ * ethertypes (i.e. 0x8100 and 0x88a8) in DVM
+ */
+ .rid = ICE_SW_LKUP_VLAN,
+ .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX,
+ .ignore_valid = false,
+ .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK,
+ .mask_valid = true,
+ .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX,
+ },
+ {
+ /* Update recipe ICE_SW_LKUP_PROMISC_VLAN to filter based on the
+ * outer/single VLAN in DVM
+ */
+ .rid = ICE_SW_LKUP_PROMISC_VLAN,
+ .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+ .ignore_valid = true,
+ .mask = 0,
+ .mask_valid = false, /* use pre-existing mask */
+ .lkup_idx = ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX,
+ },
+};
+
+/**
+ * ice_dvm_update_dflt_recipes - update default switch recipes in DVM
+ * @hw: hardware structure used to update the recipes
+ */
+static int ice_dvm_update_dflt_recipes(struct ice_hw *hw)
+{
+ unsigned long i;
+
+ for (i = 0; i < ARRAY_SIZE(ice_dvm_dflt_recipes); i++) {
+ struct ice_update_recipe_lkup_idx_params *params;
+ int status;
+
+ params = &ice_dvm_dflt_recipes[i];
+
+ status = ice_update_recipe_lkup_idx(hw, params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to update RID %d lkup_idx %d fv_idx %d mask_valid %s mask 0x%04x\n",
+ params->rid, params->lkup_idx, params->fv_idx,
+ params->mask_valid ? "true" : "false",
+ params->mask);
+ return status;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_aq_set_vlan_mode - set the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @set_params: requested VLAN mode configuration
+ *
+ * Set VLAN Mode Parameters (0x020C)
+ */
+static int
+ice_aq_set_vlan_mode(struct ice_hw *hw,
+ struct ice_aqc_set_vlan_mode *set_params)
+{
+ u8 rdma_packet, mng_vlan_prot_id;
+ struct ice_aq_desc desc;
+
+ if (!set_params)
+ return -EINVAL;
+
+ if (set_params->l2tag_prio_tagging > ICE_AQ_VLAN_PRIO_TAG_MAX)
+ return -EINVAL;
+
+ rdma_packet = set_params->rdma_packet;
+ if (rdma_packet != ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING &&
+ rdma_packet != ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING)
+ return -EINVAL;
+
+ mng_vlan_prot_id = set_params->mng_vlan_prot_id;
+ if (mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER &&
+ mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc,
+ ice_aqc_opc_set_vlan_mode_parameters);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_aq_send_cmd(hw, &desc, set_params, sizeof(*set_params),
+ NULL);
+}
+
+/**
+ * ice_set_dvm - sets up software and hardware for double VLAN mode
+ * @hw: pointer to the hardware structure
+ */
+static int ice_set_dvm(struct ice_hw *hw)
+{
+ struct ice_aqc_set_vlan_mode params = { 0 };
+ int status;
+
+ params.l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG;
+ params.rdma_packet = ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING;
+ params.mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER;
+
+ status = ice_aq_set_vlan_mode(hw, &params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set double VLAN mode parameters, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_dvm_update_dflt_recipes(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to update default recipes for double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_aq_set_port_params(hw->port_info, true, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set port in double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_set_dvm_boost_entries(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set boost TCAM entries for double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_svm - set single VLAN mode
+ * @hw: pointer to the HW structure
+ */
+static int ice_set_svm(struct ice_hw *hw)
+{
+ struct ice_aqc_set_vlan_mode *set_params;
+ int status;
+
+ status = ice_aq_set_port_params(hw->port_info, false, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set port parameters for single VLAN mode\n");
+ return status;
+ }
+
+ set_params = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*set_params),
+ GFP_KERNEL);
+ if (!set_params)
+ return -ENOMEM;
+
+ /* default configuration for SVM configurations */
+ set_params->l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG;
+ set_params->rdma_packet = ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING;
+ set_params->mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER;
+
+ status = ice_aq_set_vlan_mode(hw, set_params);
+ if (status)
+ ice_debug(hw, ICE_DBG_INIT, "Failed to configure port in single VLAN mode\n");
+
+ devm_kfree(ice_hw_to_dev(hw), set_params);
+ return status;
+}
+
+/**
+ * ice_set_vlan_mode
+ * @hw: pointer to the HW structure
+ */
+int ice_set_vlan_mode(struct ice_hw *hw)
+{
+ if (!ice_is_dvm_supported(hw))
+ return 0;
+
+ if (!ice_set_dvm(hw))
+ return 0;
+
+ return ice_set_svm(hw);
+}
+
+/**
+ * ice_print_dvm_not_supported - print if DDP and/or FW doesn't support DVM
+ * @hw: pointer to the HW structure
+ *
+ * The purpose of this function is to print that QinQ is not supported due to
+ * incompatibilty from the DDP and/or FW. This will give a hint to the user to
+ * update one and/or both components if they expect QinQ functionality.
+ */
+static void ice_print_dvm_not_supported(struct ice_hw *hw)
+{
+ bool pkg_supports_dvm = ice_pkg_supports_dvm(hw);
+ bool fw_supports_dvm = ice_fw_supports_dvm(hw);
+
+ if (!fw_supports_dvm && !pkg_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package and NVM to versions that support QinQ.\n");
+ else if (!pkg_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package to a version that supports QinQ.\n");
+ else if (!fw_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your NVM to a version that supports QinQ.\n");
+}
+
+/**
+ * ice_post_pkg_dwnld_vlan_mode_cfg - configure VLAN mode after DDP download
+ * @hw: pointer to the HW structure
+ *
+ * This function is meant to configure any VLAN mode specific functionality
+ * after the global configuration lock has been released and the DDP has been
+ * downloaded.
+ *
+ * Since only one PF downloads the DDP and configures the VLAN mode there needs
+ * to be a way to configure the other PFs after the DDP has been downloaded and
+ * the global configuration lock has been released. All such code should go in
+ * this function.
+ */
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw)
+{
+ ice_cache_vlan_mode(hw);
+
+ if (ice_is_dvm_ena(hw))
+ ice_change_proto_id_to_dvm();
+ else
+ ice_print_dvm_not_supported(hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.h b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
new file mode 100644
index 000000000000..a0fb743d08e2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_MODE_H_
+#define _ICE_VLAN_MODE_H_
+
+struct ice_hw;
+
+bool ice_is_dvm_ena(struct ice_hw *hw);
+int ice_set_vlan_mode(struct ice_hw *hw);
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw);
+
+#endif /* _ICE_VLAN_MODE_H */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
new file mode 100644
index 000000000000..5b4a0abb4607
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_lib.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice.h"
+
+static void print_invalid_tpid(struct ice_vsi *vsi, u16 tpid)
+{
+ dev_err(ice_pf_to_dev(vsi->back), "%s %d specified invalid VLAN tpid 0x%04x\n",
+ ice_vsi_type_str(vsi->type), vsi->idx, tpid);
+}
+
+/**
+ * validate_vlan - check if the ice_vlan passed in is valid
+ * @vsi: VSI used for printing error message
+ * @vlan: ice_vlan structure to validate
+ *
+ * Return true if the VLAN TPID is valid or if the VLAN TPID is 0 and the VLAN
+ * VID is 0, which allows for non-zero VLAN filters with the specified VLAN TPID
+ * and untagged VLAN 0 filters to be added to the prune list respectively.
+ */
+static bool validate_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ if (vlan->tpid != ETH_P_8021Q && vlan->tpid != ETH_P_8021AD &&
+ vlan->tpid != ETH_P_QINQ1 && (vlan->tpid || vlan->vid)) {
+ print_invalid_tpid(vsi, vlan->tpid);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_vsi_add_vlan - default add VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to add
+ */
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ int err;
+
+ if (!validate_vlan(vsi, vlan))
+ return -EINVAL;
+
+ err = ice_fltr_add_vlan(vsi, vlan);
+ if (err && err != -EEXIST) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
+ vlan->vid, vsi->vsi_num, err);
+ return err;
+ }
+
+ vsi->num_vlan++;
+ return 0;
+}
+
+/**
+ * ice_vsi_del_vlan - default del VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to delete
+ */
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int err;
+
+ if (!validate_vlan(vsi, vlan))
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+
+ err = ice_fltr_remove_vlan(vsi, vlan);
+ if (!err)
+ vsi->num_vlan--;
+ else if (err == -ENOENT || err == -EBUSY)
+ err = 0;
+ else
+ dev_err(dev, "Error removing VLAN %d on VSI %i error: %d\n",
+ vlan->vid, vsi->vsi_num, err);
+
+ return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the VSI being changed
+ */
+static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ /* Here we are configuring the VSI to let the driver add VLAN tags by
+ * setting inner_vlan_flags to ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL. The actual VLAN tag
+ * insertion happens in the Tx hot path, in ice_tx_map.
+ */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+ /* Preserve existing VLAN strip setting */
+ ctxt->info.inner_vlan_flags |= (vsi->info.inner_vlan_flags &
+ ICE_AQ_VSI_INNER_VLAN_EMODE_M);
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ /* do not allow modifying VLAN stripping when a port VLAN is configured
+ * on this VSI
+ */
+ if (vsi->info.port_based_inner_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ /* Here we are configuring what the VSI should do with the VLAN tag in
+ * the Rx packet. We can either leave the tag in the packet or put it in
+ * the Rx descriptor.
+ */
+ if (ena)
+ /* Strip VLAN tag from Rx packet and put it in the desc */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH;
+ else
+ /* Disable stripping. Leave tag in packet */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+
+ /* Allow all packets untagged/tagged */
+ ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n",
+ ena, err, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+ kfree(ctxt);
+ return err;
+}
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, const u16 tpid)
+{
+ if (tpid != ETH_P_8021Q) {
+ print_invalid_tpid(vsi, tpid);
+ return -EINVAL;
+ }
+
+ return ice_vsi_manage_vlan_stripping(vsi, true);
+}
+
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi)
+{
+ return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, const u16 tpid)
+{
+ if (tpid != ETH_P_8021Q) {
+ print_invalid_tpid(vsi, tpid);
+ return -EINVAL;
+ }
+
+ return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi)
+{
+ return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+/**
+ * __ice_vsi_set_inner_port_vlan - set port VLAN VSI context settings to enable a port VLAN
+ * @vsi: the VSI to update
+ * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
+ */
+static int __ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, u16 pvid_info)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_aqc_vsi_props *info;
+ struct ice_vsi_ctx *ctxt;
+ int ret;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+ info = &ctxt->info;
+ info->inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED |
+ ICE_AQ_VSI_INNER_VLAN_INSERT_PVID |
+ ICE_AQ_VSI_INNER_VLAN_EMODE_STR;
+ info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ info->port_based_inner_vlan = cpu_to_le16(pvid_info);
+ info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = info->inner_vlan_flags;
+ vsi->info.sw_flags2 = info->sw_flags2;
+ vsi->info.port_based_inner_vlan = info->port_based_inner_vlan;
+out:
+ kfree(ctxt);
+ return ret;
+}
+
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u16 port_vlan_info;
+
+ if (vlan->tpid != ETH_P_8021Q)
+ return -EINVAL;
+
+ if (vlan->prio > 7)
+ return -EINVAL;
+
+ port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+ return __ice_vsi_set_inner_port_vlan(vsi, port_vlan_info);
+}
+
+/**
+ * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
+ * @vsi: VSI to enable or disable VLAN pruning on
+ * @ena: set to true to enable VLAN pruning and false to disable it
+ *
+ * returns 0 if VSI is updated, negative otherwise
+ */
+static int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena)
+{
+ struct ice_vsi_ctx *ctxt;
+ struct ice_pf *pf;
+ int status;
+
+ if (!vsi)
+ return -EINVAL;
+
+ /* Don't enable VLAN pruning if the netdev is currently in promiscuous
+ * mode. VLAN pruning will be enabled when the interface exits
+ * promiscuous mode if any VLAN filters are active.
+ */
+ if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena)
+ return 0;
+
+ pf = vsi->back;
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+
+ if (ena)
+ ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ else
+ ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+ status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
+ if (status) {
+ netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n",
+ ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
+ ice_aq_str(pf->hw.adminq.sq_last_status));
+ goto err_out;
+ }
+
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+
+ kfree(ctxt);
+ return 0;
+
+err_out:
+ kfree(ctxt);
+ return status;
+}
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_pruning(vsi, true);
+}
+
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_pruning(vsi, false);
+}
+
+static int ice_cfg_vlan_antispoof(struct ice_vsi *vsi, bool enable)
+{
+ struct ice_vsi_ctx *ctx;
+ int err;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.sec_flags = vsi->info.sec_flags;
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+ if (enable)
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+ else
+ ctx->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+ err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx VLAN anti-spoof %s for VSI %d, error %d\n",
+ enable ? "ON" : "OFF", vsi->vsi_num, err);
+ else
+ vsi->info.sec_flags = ctx->info.sec_flags;
+
+ kfree(ctx);
+
+ return err;
+}
+
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_antispoof(vsi, true);
+}
+
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_antispoof(vsi, false);
+}
+
+/**
+ * tpid_to_vsi_outer_vlan_type - convert from TPID to VSI context based tag_type
+ * @tpid: tpid used to translate into VSI context based tag_type
+ * @tag_type: output variable to hold the VSI context based tag type
+ */
+static int tpid_to_vsi_outer_vlan_type(u16 tpid, u8 *tag_type)
+{
+ switch (tpid) {
+ case ETH_P_8021Q:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_8100;
+ break;
+ case ETH_P_8021AD:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_STAG;
+ break;
+ case ETH_P_QINQ1:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_9100;
+ break;
+ default:
+ *tag_type = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_ena_outer_stripping - enable outer VLAN stripping
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN stripping for
+ *
+ * Enable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for stripping will affect the TPID for insertion.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN stripping settings and the VLAN TPID. Outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This enables hardware to strip a VLAN tag with the specified TPID to be
+ * stripped from the packet and placed in the receive descriptor.
+ */
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ /* do not allow modifying VLAN stripping when a port VLAN is configured
+ * on this VSI
+ */
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN strip settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_EMODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+ ctxt->info.outer_vlan_flags |=
+ ((ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M));
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN stripping failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_dis_outer_stripping - disable outer VLAN stripping
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN stripping settings. The VLAN TPID and outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This tells the hardware to not strip any VLAN tagged packets, thus leaving
+ * them in the packet. This enables software offloaded VLAN stripping and
+ * disables hardware offloaded VLAN stripping.
+ */
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN strip settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~ICE_AQ_VSI_OUTER_VLAN_EMODE_M;
+ ctxt->info.outer_vlan_flags |= ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S;
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN stripping failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_ena_outer_insertion - enable outer VLAN insertion
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN insertion for
+ *
+ * Enable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for insertion will affect the TPID for stripping.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN insertion settings and the VLAN TPID. Outer VLAN
+ * stripping settings are unmodified.
+ *
+ * This allows a VLAN tag with the specified TPID to be inserted in the transmit
+ * descriptor.
+ */
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN insertion settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M |
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+ ctxt->info.outer_vlan_flags |=
+ ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN insertion failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_dis_outer_insertion - disable outer VLAN insertion
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN insertion settings. The VLAN TPID and outer VLAN
+ * settings are unmodified.
+ *
+ * This tells the hardware to not allow any VLAN tagged packets in the transmit
+ * descriptor. This enables software offloaded VLAN insertion and disables
+ * hardware offloaded VLAN insertion.
+ */
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN insertion settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+ ctxt->info.outer_vlan_flags |=
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN insertion failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * __ice_vsi_set_outer_port_vlan - set the outer port VLAN and related settings
+ * @vsi: VSI to configure
+ * @vlan_info: packed u16 that contains the VLAN prio and ID
+ * @tpid: TPID of the port VLAN
+ *
+ * Set the port VLAN prio, ID, and TPID.
+ *
+ * Enable VLAN pruning so the VSI doesn't receive any traffic that doesn't match
+ * a VLAN prune rule. The caller should take care to add a VLAN prune rule that
+ * matches the port VLAN ID and TPID.
+ *
+ * Tell hardware to strip outer VLAN tagged packets on receive and don't put
+ * them in the receive descriptor. VSI(s) in port VLANs should not be aware of
+ * the port VLAN ID or TPID they are assigned to.
+ *
+ * Tell hardware to prevent outer VLAN tag insertion on transmit and only allow
+ * untagged outer packets from the transmit descriptor.
+ *
+ * Also, tell the hardware to insert the port VLAN on transmit.
+ */
+static int
+__ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+
+ ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ ctxt->info.port_based_outer_vlan = cpu_to_le16(vlan_info);
+ ctxt->info.outer_vlan_flags =
+ (ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M) |
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) |
+ ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for setting outer port based VLAN failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ } else {
+ vsi->info.port_based_outer_vlan = ctxt->info.port_based_outer_vlan;
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+ }
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_set_outer_port_vlan - public version of __ice_vsi_set_outer_port_vlan
+ * @vsi: VSI to configure
+ * @vlan: ice_vlan structure used to set the port VLAN
+ *
+ * Set the outer port VLAN via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * This function does not support clearing the port VLAN as there is currently
+ * no use case for this.
+ *
+ * Use the ice_vlan structure passed in to set this VSI in a port VLAN.
+ */
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u16 port_vlan_info;
+
+ if (vlan->prio > (VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))
+ return -EINVAL;
+
+ port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+ return __ice_vsi_set_outer_port_vlan(vsi, port_vlan_info, vlan->tpid);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
new file mode 100644
index 000000000000..f459909490ec
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_LIB_H_
+#define _ICE_VSI_VLAN_LIB_H_
+
+#include <linux/types.h>
+#include "ice_vlan.h"
+
+struct ice_vsi;
+
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi);
+
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+#endif /* _ICE_VSI_VLAN_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
new file mode 100644
index 000000000000..4a6c850d83ac
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_pf_vsi_vlan_ops.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_lib.h"
+#include "ice.h"
+
+static int
+op_unsupported_vlan_arg(struct ice_vsi * __always_unused vsi,
+ struct ice_vlan * __always_unused vlan)
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+op_unsupported_tpid_arg(struct ice_vsi *__always_unused vsi,
+ u16 __always_unused tpid)
+{
+ return -EOPNOTSUPP;
+}
+
+static int op_unsupported(struct ice_vsi *__always_unused vsi)
+{
+ return -EOPNOTSUPP;
+}
+
+/* If any new ops are added to the VSI VLAN ops interface then an unsupported
+ * implementation should be set here.
+ */
+static struct ice_vsi_vlan_ops ops_unsupported = {
+ .add_vlan = op_unsupported_vlan_arg,
+ .del_vlan = op_unsupported_vlan_arg,
+ .ena_stripping = op_unsupported_tpid_arg,
+ .dis_stripping = op_unsupported,
+ .ena_insertion = op_unsupported_tpid_arg,
+ .dis_insertion = op_unsupported,
+ .ena_rx_filtering = op_unsupported,
+ .dis_rx_filtering = op_unsupported,
+ .ena_tx_filtering = op_unsupported,
+ .dis_tx_filtering = op_unsupported,
+ .set_port_vlan = op_unsupported_vlan_arg,
+};
+
+/**
+ * ice_vsi_init_unsupported_vlan_ops - init all VSI VLAN ops to unsupported
+ * @vsi: VSI to initialize VSI VLAN ops to unsupported for
+ *
+ * By default all inner and outer VSI VLAN ops return -EOPNOTSUPP. This was done
+ * as oppsed to leaving the ops null to prevent unexpected crashes. Instead if
+ * an unsupported VSI VLAN op is called it will just return -EOPNOTSUPP.
+ *
+ */
+static void ice_vsi_init_unsupported_vlan_ops(struct ice_vsi *vsi)
+{
+ vsi->outer_vlan_ops = ops_unsupported;
+ vsi->inner_vlan_ops = ops_unsupported;
+}
+
+/**
+ * ice_vsi_init_vlan_ops - initialize type specific VSI VLAN ops
+ * @vsi: VSI to initialize ops for
+ *
+ * If any VSI types are added and/or require different ops than the PF or VF VSI
+ * then they will have to add a case here to handle that. Also, VSI type
+ * specific files should be added in the same manner that was done for PF VSI.
+ */
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ /* Initialize all VSI types to have unsupported VSI VLAN ops */
+ ice_vsi_init_unsupported_vlan_ops(vsi);
+
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ ice_pf_vsi_init_vlan_ops(vsi);
+ break;
+ case ICE_VSI_VF:
+ ice_vf_vsi_init_vlan_ops(vsi);
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(vsi->back), "%s does not support VLAN operations\n",
+ ice_vsi_type_str(vsi->type));
+ break;
+ }
+}
+
+/**
+ * ice_get_compat_vsi_vlan_ops - Get VSI VLAN ops based on VLAN mode
+ * @vsi: VSI used to get the VSI VLAN ops
+ *
+ * This function is meant to be used when the caller doesn't know which VLAN ops
+ * to use (i.e. inner or outer). This allows backward compatibility for VLANs
+ * since most of the Outer VSI VLAN functins are not supported when
+ * the device is configured in Single VLAN Mode (SVM).
+ */
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi)
+{
+ if (ice_is_dvm_ena(&vsi->back->hw))
+ return &vsi->outer_vlan_ops;
+ else
+ return &vsi->inner_vlan_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
new file mode 100644
index 000000000000..5b47568f6256
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_OPS_H_
+#define _ICE_VSI_VLAN_OPS_H_
+
+#include "ice_type.h"
+#include "ice_vsi_vlan_lib.h"
+
+struct ice_vsi;
+
+struct ice_vsi_vlan_ops {
+ int (*add_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+ int (*del_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+ int (*ena_stripping)(struct ice_vsi *vsi, const u16 tpid);
+ int (*dis_stripping)(struct ice_vsi *vsi);
+ int (*ena_insertion)(struct ice_vsi *vsi, const u16 tpid);
+ int (*dis_insertion)(struct ice_vsi *vsi);
+ int (*ena_rx_filtering)(struct ice_vsi *vsi);
+ int (*dis_rx_filtering)(struct ice_vsi *vsi);
+ int (*ena_tx_filtering)(struct ice_vsi *vsi);
+ int (*dis_tx_filtering)(struct ice_vsi *vsi);
+ int (*set_port_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+};
+
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi);
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 2388837d6d6c..88853a6ed931 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
+ if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
+ !is_power_of_2(vsi->tx_rings[qid]->count)) {
+ netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
if (if_running) {
@@ -349,6 +356,7 @@ xsk_pool_if_up:
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
+failure:
if (pool_failure) {
netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
pool_present ? "en" : "dis", pool_failure);
@@ -359,33 +367,28 @@ xsk_pool_if_up:
}
/**
- * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
- * @rx_ring: Rx ring
+ * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
* @count: The number of buffers to allocate
*
* This function allocates a number of Rx buffers from the fill ring
* or the internal recycle mechanism and places them on the Rx ring.
*
- * Returns true if all allocations were successful, false if any fail.
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
*/
-bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ union ice_32b_rx_flex_desc *rx_desc, u16 count)
{
- union ice_32b_rx_flex_desc *rx_desc;
- u16 ntu = rx_ring->next_to_use;
- struct xdp_buff **xdp;
- u32 nb_buffs, i;
dma_addr_t dma;
+ u16 buffs;
+ int i;
- rx_desc = ICE_RX_DESC(rx_ring, ntu);
- xdp = ice_xdp_buf(rx_ring, ntu);
-
- nb_buffs = min_t(u16, count, rx_ring->count - ntu);
- nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
- if (!nb_buffs)
- return false;
-
- i = nb_buffs;
- while (i--) {
+ buffs = xsk_buff_alloc_batch(pool, xdp, count);
+ for (i = 0; i < buffs; i++) {
dma = xsk_buff_xdp_get_dma(*xdp);
rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
@@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
xdp++;
}
+ return buffs;
+}
+
+/**
+ * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u32 nb_buffs_extra = 0, nb_buffs;
+ u16 ntu = rx_ring->next_to_use;
+ u16 total_count = count;
+ struct xdp_buff **xdp;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ xdp = ice_xdp_buf(rx_ring, ntu);
+
+ if (ntu + count >= rx_ring->count) {
+ nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
+ rx_desc,
+ rx_ring->count - ntu);
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ xdp = ice_xdp_buf(rx_ring, 0);
+ ntu = 0;
+ count -= nb_buffs_extra;
+ ice_release_rx_desc(rx_ring, 0);
+ }
+
+ nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+
ntu += nb_buffs;
if (ntu == rx_ring->count)
ntu = 0;
- ice_release_rx_desc(rx_ring, ntu);
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
+ u16 batched, leftover, i, tail_bumps;
+
+ batched = ALIGN_DOWN(count, rx_thresh);
+ tail_bumps = batched / rx_thresh;
+ leftover = count & (rx_thresh - 1);
- return count == nb_buffs;
+ for (i = 0; i < tail_bumps; i++)
+ if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+ return false;
+ return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
}
/**
@@ -428,20 +495,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
static struct sk_buff *
ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
- unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start;
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
- unsigned int datasize = xdp->data_end - xdp->data;
struct sk_buff *skb;
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+ net_prefetch(xdp->data_meta);
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, xdp->data - xdp->data_hard_start);
- memcpy(__skb_put(skb, datasize), xdp->data, datasize);
- if (metasize)
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
xsk_buff_free(xdp);
return skb;
@@ -528,7 +599,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
- if (!ice_test_staterr(rx_desc, stat_err_bits))
+ if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
break;
/* This memory barrier is needed to keep us from reading
@@ -583,9 +654,7 @@ construct_skb:
total_rx_bytes += skb->len;
total_rx_packets++;
- stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
- if (ice_test_staterr(rx_desc, stat_err_bits))
- vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+ vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
ICE_RX_FLEX_DESC_PTYPE_M;
@@ -612,134 +681,221 @@ construct_skb:
}
/**
- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
- * @budget: max number of frames to xmit
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ xdp_ring->xdp_tx_active--;
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
*
- * Returns true if cleanup/transmission is done.
+ * Returns count of cleaned descriptors
*/
-static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget)
+static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
{
- struct ice_tx_desc *tx_desc = NULL;
- bool work_done = true;
- struct xdp_desc desc;
- dma_addr_t dma;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ int budget = napi_budget / tx_thresh;
+ u16 next_dd = xdp_ring->next_dd;
+ u16 ntc, cleared_dds = 0;
- while (likely(budget-- > 0)) {
+ do {
+ struct ice_tx_desc *next_dd_desc;
+ u16 desc_cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
+ u32 xsk_frames;
+ u16 i;
- if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
- xdp_ring->tx_stats.tx_busy++;
- work_done = false;
+ next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
+ if (!(next_dd_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
break;
- }
- tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
-
- if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
- break;
-
- dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
- desc.len);
-
- tx_buf->bytecount = desc.len;
+ cleared_dds++;
+ xsk_frames = 0;
+ if (likely(!xdp_ring->xdp_tx_active)) {
+ xsk_frames = tx_thresh;
+ goto skip;
+ }
- tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
- tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz =
- ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0);
+ ntc = xdp_ring->next_to_clean;
- xdp_ring->next_to_use++;
- if (xdp_ring->next_to_use == xdp_ring->count)
- xdp_ring->next_to_use = 0;
- }
+ for (i = 0; i < tx_thresh; i++) {
+ tx_buf = &xdp_ring->tx_buf[ntc];
- if (tx_desc) {
- ice_xdp_ring_update_tail(xdp_ring);
- xsk_tx_release(xdp_ring->xsk_pool);
- }
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
- return budget > 0 && work_done;
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+skip:
+ xdp_ring->next_to_clean += tx_thresh;
+ if (xdp_ring->next_to_clean >= desc_cnt)
+ xdp_ring->next_to_clean -= desc_cnt;
+ if (xsk_frames)
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+ next_dd_desc->cmd_type_offset_bsz = 0;
+ next_dd = next_dd + tx_thresh;
+ if (next_dd >= desc_cnt)
+ next_dd = tx_thresh - 1;
+ } while (budget--);
+
+ xdp_ring->next_dd = next_dd;
+
+ return cleared_dds * tx_thresh;
}
/**
- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
- * @xdp_ring: XDP Tx ring
- * @tx_buf: Tx buffer to clean
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
*/
-static void
-ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+ unsigned int *total_bytes)
{
- xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
- dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
- dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
- dma_unmap_len_set(tx_buf, len, 0);
+ struct ice_tx_desc *tx_desc;
+ dma_addr_t dma;
+
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, desc->len, 0);
+
+ *total_bytes += desc->len;
}
/**
- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
- * @xdp_ring: XDP Tx ring
- * @budget: NAPI budget
- *
- * Returns true if cleanup/tranmission is done.
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
*/
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget)
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ unsigned int *total_bytes)
{
- int total_packets = 0, total_bytes = 0;
- s16 ntc = xdp_ring->next_to_clean;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u16 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
- struct ice_tx_buf *tx_buf;
- u32 xsk_frames = 0;
- bool xmit_done;
+ u32 i;
- tx_desc = ICE_TX_DESC(xdp_ring, ntc);
- tx_buf = &xdp_ring->tx_buf[ntc];
- ntc -= xdp_ring->count;
+ loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ dma_addr_t dma;
- do {
- if (!(tx_desc->cmd_type_offset_bsz &
- cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
- break;
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
- total_bytes += tx_buf->bytecount;
- total_packets++;
+ tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, descs[i].len, 0);
- if (tx_buf->raw_buf) {
- ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
- tx_buf->raw_buf = NULL;
- } else {
- xsk_frames++;
- }
+ *total_bytes += descs[i].len;
+ }
- tx_desc->cmd_type_offset_bsz = 0;
- tx_buf++;
- tx_desc++;
- ntc++;
+ xdp_ring->next_to_use = ntu;
- if (unlikely(!ntc)) {
- ntc -= xdp_ring->count;
- tx_buf = xdp_ring->tx_buf;
- tx_desc = ICE_TX_DESC(xdp_ring, 0);
- }
+ if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += tx_thresh;
+ }
+}
- prefetch(tx_desc);
+/**
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ u32 nb_pkts, unsigned int *total_bytes)
+{
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u32 batched, leftover, i;
+
+ batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+ leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+ for (i = 0; i < batched; i += PKTS_PER_BATCH)
+ ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+ for (; i < batched + leftover; i++)
+ ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+
+ if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+ struct ice_tx_desc *tx_desc;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += tx_thresh;
+ }
+}
- } while (likely(--budget));
+/**
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @budget: number of free descriptors on HW Tx ring that can be used
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
+{
+ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u32 nb_pkts, nb_processed = 0;
+ unsigned int total_bytes = 0;
+
+ if (budget < tx_thresh)
+ budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+ if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+ struct ice_tx_desc *tx_desc;
+
+ nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+ ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs = tx_thresh - 1;
+ xdp_ring->next_to_use = 0;
+ }
- ntc += xdp_ring->count;
- xdp_ring->next_to_clean = ntc;
+ ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+ &total_bytes);
- if (xsk_frames)
- xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+ ice_xdp_ring_update_tail(xdp_ring);
+ ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
- ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
- xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
-
- return budget > 0 && xmit_done;
+ return nb_pkts < budget;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 4c7bd8e9dfc4..0cbb5793b5b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -6,19 +6,37 @@
#include "ice_txrx.h"
#include "ice.h"
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 4
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
u16 qid);
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
#else
+static inline bool
+ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
+ u32 __always_unused budget,
+ int __always_unused napi_budget)
+{
+ return false;
+}
+
static inline int
ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
struct xsk_buff_pool __always_unused *pool,
@@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
}
static inline bool
-ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring,
- int __always_unused budget)
-{
- return false;
-}
-
-static inline bool
ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
u16 __always_unused count)
{
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 51a2dcaf553d..2a5782063f4c 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev,
memcpy(&temp_ring[i], adapter->rx_ring[i],
sizeof(struct igb_ring));
- /* Clear copied XDP RX-queue info */
- memset(&temp_ring[i].xdp_rxq, 0,
- sizeof(temp_ring[i].xdp_rxq));
-
temp_ring[i].count = new_rx_count;
err = igb_setup_rx_resources(&temp_ring[i]);
if (err) {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 38ba92022cd4..34b33b21e0dc 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3164,8 +3164,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
s32 ret_val;
static int global_quad_port_a; /* global quad port a indication */
const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
- int err, pci_using_dac;
u8 part_str[E1000_PBANUM_LENGTH];
+ int err;
/* Catch broken hardware that put the wrong VF device ID in
* the PCIe SR-IOV capability.
@@ -3180,17 +3180,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- pci_using_dac = 0;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (!err) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
+ if (err) {
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
err = pci_request_mem_regions(pdev, igb_driver_name);
@@ -3306,8 +3300,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (hw->mac.type >= e1000_i350)
netdev->hw_features |= NETIF_F_NTUPLE;
- if (pci_using_dac)
- netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HIGHDMA;
netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
netdev->mpls_features |= NETIF_F_HW_CSUM;
@@ -4352,7 +4345,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
{
struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
struct device *dev = rx_ring->dev;
- int size;
+ int size, res;
+
+ /* XDP RX-queue info */
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->queue_index, 0);
+ if (res < 0) {
+ dev_err(dev, "Failed to register xdp_rxq index %u\n",
+ rx_ring->queue_index);
+ return res;
+ }
size = sizeof(struct igb_rx_buffer) * rx_ring->count;
@@ -4375,14 +4379,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
rx_ring->xdp_prog = adapter->xdp_prog;
- /* XDP RX-queue info */
- if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index, 0) < 0)
- goto err;
-
return 0;
err:
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index b78407289741..43ced78c3a2e 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2684,25 +2684,18 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct igbvf_adapter *adapter;
struct e1000_hw *hw;
const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data];
-
static int cards_found;
- int err, pci_using_dac;
+ int err;
err = pci_enable_device_mem(pdev);
if (err)
return err;
- pci_using_dac = 0;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (!err) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
+ if (err) {
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
err = pci_request_regions(pdev, igbvf_driver_name);
@@ -2783,10 +2776,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->hw_features |= NETIF_F_GSO_PARTIAL |
IGBVF_GSO_PARTIAL_FEATURES;
- netdev->features = netdev->hw_features;
-
- if (pci_using_dac)
- netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
netdev->mpls_features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 2f17f36e94fd..74b2c590ed5d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
u8 index = rx_ring->queue_index;
int size, desc_len, res;
+ /* XDP RX-queue info */
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
rx_ring->q_vector->napi.napi_id);
if (res < 0) {
@@ -2446,19 +2449,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
struct xdp_buff *xdp)
{
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
- unsigned int datasize = xdp->data_end - xdp->data;
- unsigned int totalsize = metasize + datasize;
struct sk_buff *skb;
- skb = __napi_alloc_skb(&ring->q_vector->napi,
- xdp->data_end - xdp->data_hard_start,
+ net_prefetch(xdp->data_meta);
+
+ skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
- memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
if (metasize) {
skb_metadata_set(skb, metasize);
__skb_pull(skb, metasize);
@@ -6251,23 +6255,17 @@ static int igc_probe(struct pci_dev *pdev,
struct net_device *netdev;
struct igc_hw *hw;
const struct igc_info *ei = igc_info_tbl[ent->driver_data];
- int err, pci_using_dac;
+ int err;
err = pci_enable_device_mem(pdev);
if (err)
return err;
- pci_using_dac = 0;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (!err) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
+ if (err) {
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
err = pci_request_mem_regions(pdev, igc_driver_name);
@@ -6367,8 +6365,7 @@ static int igc_probe(struct pci_dev *pdev,
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
netdev->hw_features |= netdev->features;
- if (pci_using_dac)
- netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HIGHDMA;
netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
netdev->mpls_features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 99d481904ce6..affdefcca7e3 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -361,7 +361,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct net_device *netdev = NULL;
struct ixgb_adapter *adapter;
static int cards_found = 0;
- int pci_using_dac;
u8 addr[ETH_ALEN];
int i;
int err;
@@ -370,16 +369,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- pci_using_dac = 0;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (!err) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- pr_err("No usable DMA configuration, aborting\n");
- goto err_dma_mask;
- }
+ if (err) {
+ pr_err("No usable DMA configuration, aborting\n");
+ goto err_dma_mask;
}
err = pci_request_regions(pdev, ixgb_driver_name);
@@ -444,10 +437,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_RXCSUM;
- if (pci_using_dac) {
- netdev->features |= NETIF_F_HIGHDMA;
- netdev->vlan_features |= NETIF_F_HIGHDMA;
- }
+ netdev->features |= NETIF_F_HIGHDMA;
+ netdev->vlan_features |= NETIF_F_HIGHDMA;
/* MTU range: 68 - 16114 */
netdev->min_mtu = ETH_MIN_MTU;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 89b467006291..2c8a4a06f56a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10632,9 +10632,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct ixgbe_adapter *adapter = NULL;
struct ixgbe_hw *hw;
const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
- int i, err, pci_using_dac, expected_gts;
unsigned int indices = MAX_TX_QUEUES;
u8 part_str[IXGBE_PBANUM_LENGTH];
+ int i, err, expected_gts;
bool disable_dev = false;
#ifdef IXGBE_FCOE
u16 device_caps;
@@ -10654,16 +10654,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev,
- "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
- pci_using_dac = 0;
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
err = pci_request_mem_regions(pdev, ixgbe_driver_name);
@@ -10861,8 +10856,7 @@ skip_sriov:
netdev->hw_features |= NETIF_F_NTUPLE |
NETIF_F_HW_TC;
- if (pci_using_dac)
- netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HIGHDMA;
netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
netdev->hw_enc_features |= netdev->vlan_features;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index b3fd8e5cd85b..ee28929b9c5f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
}
static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
- struct ixgbe_rx_buffer *bi)
+ const struct xdp_buff *xdp)
{
- unsigned int metasize = bi->xdp->data - bi->xdp->data_meta;
- unsigned int datasize = bi->xdp->data_end - bi->xdp->data;
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
struct sk_buff *skb;
+ net_prefetch(xdp->data_meta);
+
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- bi->xdp->data_end - bi->xdp->data_hard_start,
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start);
- memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize);
- if (metasize)
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
- xsk_buff_free(bi->xdp);
- bi->xdp = NULL;
return skb;
}
@@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
}
/* XDP_PASS path */
- skb = ixgbe_construct_skb_zc(rx_ring, bi);
+ skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp);
if (!skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
break;
}
+ xsk_buff_free(bi->xdp);
+ bi->xdp = NULL;
+
cleaned_count++;
ixgbe_inc_ntc(rx_ring);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0f293acd17e8..17fbc450da61 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4512,22 +4512,17 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct ixgbevf_adapter *adapter = NULL;
struct ixgbe_hw *hw = NULL;
const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data];
- int err, pci_using_dac;
bool disable_dev = false;
+ int err;
err = pci_enable_device(pdev);
if (err)
return err;
- if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
- pci_using_dac = 1;
- } else {
- err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
- goto err_dma;
- }
- pci_using_dac = 0;
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
err = pci_request_regions(pdev, ixgbevf_driver_name);
@@ -4607,10 +4602,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->hw_features |= NETIF_F_GSO_PARTIAL |
IXGBEVF_GSO_PARTIAL_FEATURES;
- netdev->features = netdev->hw_features;
-
- if (pci_using_dac)
- netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
netdev->mpls_features |= NETIF_F_SG |
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 83c8908f0cc7..f1335a1ed695 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
bytes_compl += buf->skb->len;
pkts_compl++;
dev_kfree_skb_any(buf->skb);
- } else if (buf->type == MVNETA_TYPE_XDP_TX ||
- buf->type == MVNETA_TYPE_XDP_NDO) {
+ } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+ buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
if (napi && buf->type == MVNETA_TYPE_XDP_TX)
xdp_return_frame_rx_napi(buf->xdpf);
else
@@ -2060,61 +2060,104 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
static void
mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
- struct xdp_buff *xdp, struct skb_shared_info *sinfo,
- int sync_len)
+ struct xdp_buff *xdp, int sync_len)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i;
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
for (i = 0; i < sinfo->nr_frags; i++)
page_pool_put_full_page(rxq->page_pool,
skb_frag_page(&sinfo->frags[i]), true);
+
+out:
page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
sync_len, true);
}
static int
mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
- struct xdp_frame *xdpf, bool dma_map)
+ struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+ struct device *dev = pp->dev->dev.parent;
struct mvneta_tx_desc *tx_desc;
- struct mvneta_tx_buf *buf;
- dma_addr_t dma_addr;
+ int i, num_frames = 1;
+ struct page *page;
- if (txq->count >= txq->tx_stop_threshold)
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ num_frames += sinfo->nr_frags;
+
+ if (txq->count + num_frames >= txq->size)
return MVNETA_XDP_DROPPED;
- tx_desc = mvneta_txq_next_desc_get(txq);
+ for (i = 0; i < num_frames; i++) {
+ struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+ skb_frag_t *frag = NULL;
+ int len = xdpf->len;
+ dma_addr_t dma_addr;
- buf = &txq->buf[txq->txq_put_index];
- if (dma_map) {
- /* ndo_xdp_xmit */
- dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
- xdpf->len, DMA_TO_DEVICE);
- if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
- mvneta_txq_desc_put(txq);
- return MVNETA_XDP_DROPPED;
+ if (unlikely(i)) { /* paged area */
+ frag = &sinfo->frags[i - 1];
+ len = skb_frag_size(frag);
}
- buf->type = MVNETA_TYPE_XDP_NDO;
- } else {
- struct page *page = virt_to_page(xdpf->data);
- dma_addr = page_pool_get_dma_addr(page) +
- sizeof(*xdpf) + xdpf->headroom;
- dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
- xdpf->len, DMA_BIDIRECTIONAL);
- buf->type = MVNETA_TYPE_XDP_TX;
- }
- buf->xdpf = xdpf;
+ tx_desc = mvneta_txq_next_desc_get(txq);
+ if (dma_map) {
+ /* ndo_xdp_xmit */
+ void *data;
+
+ data = unlikely(frag) ? skb_frag_address(frag)
+ : xdpf->data;
+ dma_addr = dma_map_single(dev, data, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_addr)) {
+ mvneta_txq_desc_put(txq);
+ goto unmap;
+ }
+
+ buf->type = MVNETA_TYPE_XDP_NDO;
+ } else {
+ page = unlikely(frag) ? skb_frag_page(frag)
+ : virt_to_page(xdpf->data);
+ dma_addr = page_pool_get_dma_addr(page);
+ if (unlikely(frag))
+ dma_addr += skb_frag_off(frag);
+ else
+ dma_addr += sizeof(*xdpf) + xdpf->headroom;
+ dma_sync_single_for_device(dev, dma_addr, len,
+ DMA_BIDIRECTIONAL);
+ buf->type = MVNETA_TYPE_XDP_TX;
+ }
+ buf->xdpf = unlikely(i) ? NULL : xdpf;
- tx_desc->command = MVNETA_TXD_FLZ_DESC;
- tx_desc->buf_phys_addr = dma_addr;
- tx_desc->data_size = xdpf->len;
+ tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+ tx_desc->buf_phys_addr = dma_addr;
+ tx_desc->data_size = len;
+ *nxmit_byte += len;
- mvneta_txq_inc_put(txq);
- txq->pending++;
- txq->count++;
+ mvneta_txq_inc_put(txq);
+ }
+ /*last descriptor */
+ tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
+
+ txq->pending += num_frames;
+ txq->count += num_frames;
return MVNETA_XDP_TX;
+
+unmap:
+ for (i--; i >= 0; i--) {
+ mvneta_txq_desc_put(txq);
+ tx_desc = txq->descs + txq->next_desc_to_proc;
+ dma_unmap_single(dev, tx_desc->buf_phys_addr,
+ tx_desc->data_size,
+ DMA_TO_DEVICE);
+ }
+
+ return MVNETA_XDP_DROPPED;
}
static int
@@ -2123,8 +2166,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
struct mvneta_tx_queue *txq;
struct netdev_queue *nq;
+ int cpu, nxmit_byte = 0;
struct xdp_frame *xdpf;
- int cpu;
u32 ret;
xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2179,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
nq = netdev_get_tx_queue(pp->dev, txq->id);
__netif_tx_lock(nq, cpu);
- ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+ ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
if (ret == MVNETA_XDP_TX) {
u64_stats_update_begin(&stats->syncp);
- stats->es.ps.tx_bytes += xdpf->len;
+ stats->es.ps.tx_bytes += nxmit_byte;
stats->es.ps.tx_packets++;
stats->es.ps.xdp_tx++;
u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2221,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
__netif_tx_lock(nq, cpu);
for (i = 0; i < num_frame; i++) {
- ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+ ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+ true);
if (ret != MVNETA_XDP_TX)
break;
- nxmit_byte += frames[i]->len;
nxmit++;
}
@@ -2205,7 +2248,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
struct bpf_prog *prog, struct xdp_buff *xdp,
u32 frame_sz, struct mvneta_stats *stats)
{
- struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
unsigned int len, data_len, sync;
u32 ret, act;
@@ -2226,7 +2268,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
err = xdp_do_redirect(pp->dev, xdp, prog);
if (unlikely(err)) {
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
ret = MVNETA_XDP_DROPPED;
} else {
ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2279,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
case XDP_TX:
ret = mvneta_xdp_xmit_back(pp, xdp);
if (ret != MVNETA_XDP_TX)
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
break;
default:
bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2288,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
trace_xdp_exception(pp->dev, prog, act);
fallthrough;
case XDP_DROP:
- mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+ mvneta_xdp_put_buff(pp, rxq, xdp, sync);
ret = MVNETA_XDP_DROPPED;
stats->xdp_drop++;
break;
@@ -2269,7 +2311,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
int data_len = -MVNETA_MH_SIZE, len;
struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir;
- struct skb_shared_info *sinfo;
if (*size > MVNETA_MAX_RX_BUF_SIZE) {
len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2330,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
/* Prefetch header */
prefetch(data);
+ xdp_buff_clear_frags_flag(xdp);
xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
data_len, false);
-
- sinfo = xdp_get_shared_info_from_buff(xdp);
- sinfo->nr_frags = 0;
}
static void
@@ -2301,9 +2340,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc,
struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, int *size,
- struct skb_shared_info *xdp_sinfo,
struct page *page)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir;
int data_len, len;
@@ -2321,25 +2360,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
len, dma_dir);
rx_desc->buf_phys_addr = 0;
- if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
- skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+ if (!xdp_buff_has_frags(xdp))
+ sinfo->nr_frags = 0;
+
+ if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
skb_frag_off_set(frag, pp->rx_offset_correction);
skb_frag_size_set(frag, data_len);
__skb_frag_set_page(frag, page);
+
+ if (!xdp_buff_has_frags(xdp)) {
+ sinfo->xdp_frags_size = *size;
+ xdp_buff_set_frags_flag(xdp);
+ }
+ if (page_is_pfmemalloc(page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
} else {
page_pool_put_full_page(rxq->page_pool, page, true);
}
-
- /* last fragment */
- if (len == *size) {
- struct skb_shared_info *sinfo;
-
- sinfo = xdp_get_shared_info_from_buff(xdp);
- sinfo->nr_frags = xdp_sinfo->nr_frags;
- memcpy(sinfo->frags, xdp_sinfo->frags,
- sinfo->nr_frags * sizeof(skb_frag_t));
- }
*size -= len;
}
@@ -2348,8 +2387,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
struct xdp_buff *xdp, u32 desc_status)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
- int i, num_frags = sinfo->nr_frags;
struct sk_buff *skb;
+ u8 num_frags;
+
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ num_frags = sinfo->nr_frags;
skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
if (!skb)
@@ -2361,13 +2403,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
skb_put(skb, xdp->data_end - xdp->data);
skb->ip_summed = mvneta_rx_csum(pp, desc_status);
- for (i = 0; i < num_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
-
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- skb_frag_page(frag), skb_frag_off(frag),
- skb_frag_size(frag), PAGE_SIZE);
- }
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, num_frags,
+ sinfo->xdp_frags_size,
+ num_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
@@ -2379,7 +2419,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
{
int rx_proc = 0, rx_todo, refill, size = 0;
struct net_device *dev = pp->dev;
- struct skb_shared_info sinfo;
struct mvneta_stats ps = {};
struct bpf_prog *xdp_prog;
u32 desc_status, frame_sz;
@@ -2388,8 +2427,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
xdp_buf.data_hard_start = NULL;
- sinfo.nr_frags = 0;
-
/* Get number of received packets */
rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
@@ -2431,7 +2468,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
}
mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
- &size, &sinfo, page);
+ &size, page);
} /* Middle or Last descriptor */
if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2476,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
continue;
if (size) {
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
goto next;
}
@@ -2451,7 +2488,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
if (IS_ERR(skb)) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
u64_stats_update_begin(&stats->syncp);
stats->es.skb_alloc_error++;
@@ -2468,11 +2505,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
napi_gro_receive(napi, skb);
next:
xdp_buf.data_hard_start = NULL;
- sinfo.nr_frags = 0;
}
if (xdp_buf.data_hard_start)
- mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+ mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
if (ps.xdp_redirect)
xdp_do_flush_map();
@@ -3260,7 +3296,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
return err;
}
- err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+ err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+ PAGE_SIZE);
if (err < 0)
goto err_free_pp;
@@ -3740,6 +3777,7 @@ static void mvneta_percpu_disable(void *arg)
static int mvneta_change_mtu(struct net_device *dev, int mtu)
{
struct mvneta_port *pp = netdev_priv(dev);
+ struct bpf_prog *prog = pp->xdp_prog;
int ret;
if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3786,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
}
- if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
- netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+ if (prog && !prog->aux->xdp_has_frags &&
+ mtu > MVNETA_MAX_RX_BUF_SIZE) {
+ netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+ mtu);
+
return -EINVAL;
}
@@ -3969,6 +4010,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
.pcs_an_restart = mvneta_pcs_an_restart,
};
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct mvneta_port *pp = netdev_priv(ndev);
+
+ return &pp->phylink_pcs;
+}
+
static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
phy_interface_t interface)
{
@@ -4169,13 +4219,14 @@ static void mvneta_mac_link_up(struct phylink_config *config,
mvneta_port_up(pp);
if (phy && pp->eee_enabled) {
- pp->eee_active = phy_init_eee(phy, 0) >= 0;
+ pp->eee_active = phy_init_eee(phy, false) >= 0;
mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled);
}
}
static const struct phylink_mac_ops mvneta_phylink_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = mvneta_mac_select_pcs,
.mac_prepare = mvneta_mac_prepare,
.mac_config = mvneta_mac_config,
.mac_finish = mvneta_mac_finish,
@@ -4490,8 +4541,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
struct mvneta_port *pp = netdev_priv(dev);
struct bpf_prog *old_prog;
- if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+ if (prog && !prog->aux->xdp_has_frags &&
+ dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+ NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
return -EOPNOTSUPP;
}
@@ -5321,26 +5373,62 @@ static int mvneta_probe(struct platform_device *pdev)
if (!dev)
return -ENOMEM;
- dev->irq = irq_of_parse_and_map(dn, 0);
- if (dev->irq == 0)
- return -EINVAL;
+ dev->tx_queue_len = MVNETA_MAX_TXD;
+ dev->watchdog_timeo = 5 * HZ;
+ dev->netdev_ops = &mvneta_netdev_ops;
+ dev->ethtool_ops = &mvneta_eth_tool_ops;
+
+ pp = netdev_priv(dev);
+ spin_lock_init(&pp->lock);
+ pp->dn = dn;
+
+ pp->rxq_def = rxq_def;
+ pp->indir[0] = rxq_def;
err = of_get_phy_mode(dn, &phy_mode);
if (err) {
dev_err(&pdev->dev, "incorrect phy-mode\n");
- goto err_free_irq;
+ return err;
}
+ pp->phy_interface = phy_mode;
+
comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
- if (comphy == ERR_PTR(-EPROBE_DEFER)) {
- err = -EPROBE_DEFER;
- goto err_free_irq;
- } else if (IS_ERR(comphy)) {
+ if (comphy == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+
+ if (IS_ERR(comphy))
comphy = NULL;
+
+ pp->comphy = comphy;
+
+ pp->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(pp->base))
+ return PTR_ERR(pp->base);
+
+ /* Get special SoC configurations */
+ if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+ pp->neta_armada3700 = true;
+
+ dev->irq = irq_of_parse_and_map(dn, 0);
+ if (dev->irq == 0)
+ return -EINVAL;
+
+ pp->clk = devm_clk_get(&pdev->dev, "core");
+ if (IS_ERR(pp->clk))
+ pp->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(pp->clk)) {
+ err = PTR_ERR(pp->clk);
+ goto err_free_irq;
}
- pp = netdev_priv(dev);
- spin_lock_init(&pp->lock);
+ clk_prepare_enable(pp->clk);
+
+ pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+ if (!IS_ERR(pp->clk_bus))
+ clk_prepare_enable(pp->clk_bus);
+
+ pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
pp->phylink_config.dev = &dev->dev;
pp->phylink_config.type = PHYLINK_NETDEV;
@@ -5377,55 +5465,16 @@ static int mvneta_probe(struct platform_device *pdev)
phy_mode, &mvneta_phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
- goto err_free_irq;
- }
-
- dev->tx_queue_len = MVNETA_MAX_TXD;
- dev->watchdog_timeo = 5 * HZ;
- dev->netdev_ops = &mvneta_netdev_ops;
-
- dev->ethtool_ops = &mvneta_eth_tool_ops;
-
- pp->phylink = phylink;
- pp->comphy = comphy;
- pp->phy_interface = phy_mode;
- pp->dn = dn;
-
- pp->rxq_def = rxq_def;
- pp->indir[0] = rxq_def;
-
- /* Get special SoC configurations */
- if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
- pp->neta_armada3700 = true;
-
- pp->clk = devm_clk_get(&pdev->dev, "core");
- if (IS_ERR(pp->clk))
- pp->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(pp->clk)) {
- err = PTR_ERR(pp->clk);
- goto err_free_phylink;
- }
-
- clk_prepare_enable(pp->clk);
-
- pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
- if (!IS_ERR(pp->clk_bus))
- clk_prepare_enable(pp->clk_bus);
-
- pp->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(pp->base)) {
- err = PTR_ERR(pp->base);
goto err_clk;
}
- pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
- phylink_set_pcs(phylink, &pp->phylink_pcs);
+ pp->phylink = phylink;
/* Alloc per-cpu port structure */
pp->ports = alloc_percpu(struct mvneta_pcpu_port);
if (!pp->ports) {
err = -ENOMEM;
- goto err_clk;
+ goto err_free_phylink;
}
/* Alloc per-cpu stats */
@@ -5569,12 +5618,12 @@ err_netdev:
free_percpu(pp->stats);
err_free_ports:
free_percpu(pp->ports);
-err_clk:
- clk_disable_unprepare(pp->clk_bus);
- clk_disable_unprepare(pp->clk);
err_free_phylink:
if (pp->phylink)
phylink_destroy(pp->phylink);
+err_clk:
+ clk_disable_unprepare(pp->clk_bus);
+ clk_disable_unprepare(pp->clk);
err_free_irq:
irq_dispose_mapping(dev->irq);
return err;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 3631d612aaca..25491edc35ce 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -578,31 +578,78 @@ void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
}
}
+static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id,
+ u8 *tx_pause, u8 *rx_pause)
+{
+ struct cgx *cgx = cgxd;
+ u64 cfg;
+
+ if (is_dev_rpm(cgx))
+ return 0;
+
+ if (!is_lmac_valid(cgx, lmac_id))
+ return -ENODEV;
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
+ *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV);
+ return 0;
+}
+
/* Enable or disable forwarding received pause frames to Tx block */
void cgx_lmac_enadis_rx_pause_fwding(void *cgxd, int lmac_id, bool enable)
{
struct cgx *cgx = cgxd;
+ u8 rx_pause, tx_pause;
+ bool is_pfc_enabled;
+ struct lmac *lmac;
u64 cfg;
if (!cgx)
return;
- if (enable) {
- cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
- cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+ lmac = lmac_pdata(lmac_id, cgx);
+ if (!lmac)
+ return;
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
- cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ /* Pause frames are not enabled just return */
+ if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max))
+ return;
+
+ cgx_lmac_get_pause_frm_status(cgx, lmac_id, &rx_pause, &tx_pause);
+ is_pfc_enabled = rx_pause ? false : true;
+
+ if (enable) {
+ if (!is_pfc_enabled) {
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ } else {
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+ cfg |= CGXX_SMUX_CBFC_CTL_BCK_EN;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+ }
} else {
- cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
- cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
- cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ if (!is_pfc_enabled) {
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ } else {
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+ cfg &= ~CGXX_SMUX_CBFC_CTL_BCK_EN;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+ }
}
}
@@ -722,26 +769,6 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable)
return !!(last & DATA_PKT_TX_EN);
}
-static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id,
- u8 *tx_pause, u8 *rx_pause)
-{
- struct cgx *cgx = cgxd;
- u64 cfg;
-
- if (is_dev_rpm(cgx))
- return 0;
-
- if (!is_lmac_valid(cgx, lmac_id))
- return -ENODEV;
-
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
- *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK);
-
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
- *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV);
- return 0;
-}
-
static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
u8 tx_pause, u8 rx_pause)
{
@@ -782,21 +809,8 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable)
if (!is_lmac_valid(cgx, lmac_id))
return;
- if (enable) {
- /* Enable receive pause frames */
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
- cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
-
- cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
- cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
-
- /* Enable pause frames transmission */
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
- cfg |= CGX_SMUX_TX_CTL_L2P_BP_CONV;
- cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
+ if (enable) {
/* Set pause time and interval */
cgx_write(cgx, lmac_id, CGXX_SMUX_TX_PAUSE_PKT_TIME,
DEFAULT_PAUSE_TIME);
@@ -813,21 +827,120 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable)
cfg &= ~0xFFFFULL;
cgx_write(cgx, lmac_id, CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL,
cfg | (DEFAULT_PAUSE_TIME / 2));
- } else {
- /* ALL pause frames received are completely ignored */
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
- cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ }
- cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
- cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
- cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+ /* ALL pause frames received are completely ignored */
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+ /* Disable pause frames transmission */
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
+ cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
+
+ cfg = cgx_read(cgx, 0, CGXX_CMR_RX_OVR_BP);
+ cfg |= CGX_CMR_RX_OVR_BP_EN(lmac_id);
+ cfg &= ~CGX_CMR_RX_OVR_BP_BP(lmac_id);
+ cgx_write(cgx, 0, CGXX_CMR_RX_OVR_BP, cfg);
+}
+
+int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+ int pfvf_idx)
+{
+ struct cgx *cgx = cgxd;
+ struct lmac *lmac;
+
+ lmac = lmac_pdata(lmac_id, cgx);
+ if (!lmac)
+ return -ENODEV;
+
+ if (!rx_pause)
+ clear_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap);
+ else
+ set_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap);
+
+ if (!tx_pause)
+ clear_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap);
+ else
+ set_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap);
+
+ /* check if other pfvfs are using flow control */
+ if (!rx_pause && bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) {
+ dev_warn(&cgx->pdev->dev,
+ "Receive Flow control disable not permitted as its used by other PFVFs\n");
+ return -EPERM;
+ }
+
+ if (!tx_pause && bitmap_weight(lmac->tx_fc_pfvf_bmap.bmap, lmac->tx_fc_pfvf_bmap.max)) {
+ dev_warn(&cgx->pdev->dev,
+ "Transmit Flow control disable not permitted as its used by other PFVFs\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause,
+ u8 rx_pause, u16 pfc_en)
+{
+ struct cgx *cgx = cgxd;
+ u64 cfg;
+
+ if (!is_lmac_valid(cgx, lmac_id))
+ return -ENODEV;
+
+ /* Return as no traffic classes are requested */
+ if (tx_pause && !pfc_en)
+ return 0;
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
- /* Disable pause frames transmission */
- cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL);
- cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV;
- cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg);
+ if (rx_pause) {
+ cfg |= (CGXX_SMUX_CBFC_CTL_RX_EN |
+ CGXX_SMUX_CBFC_CTL_BCK_EN |
+ CGXX_SMUX_CBFC_CTL_DRP_EN);
+ } else {
+ cfg &= ~(CGXX_SMUX_CBFC_CTL_RX_EN |
+ CGXX_SMUX_CBFC_CTL_BCK_EN |
+ CGXX_SMUX_CBFC_CTL_DRP_EN);
}
+
+ if (tx_pause)
+ cfg |= CGXX_SMUX_CBFC_CTL_TX_EN;
+ else
+ cfg &= ~CGXX_SMUX_CBFC_CTL_TX_EN;
+
+ cfg = FIELD_SET(CGX_PFC_CLASS_MASK, pfc_en, cfg);
+
+ cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg);
+
+ /* Write source MAC address which will be filled into PFC packet */
+ cfg = cgx_lmac_addr_get(cgx->cgx_id, lmac_id);
+ cgx_write(cgx, lmac_id, CGXX_SMUX_SMAC, cfg);
+
+ return 0;
+}
+
+int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause,
+ u8 *rx_pause)
+{
+ struct cgx *cgx = cgxd;
+ u64 cfg;
+
+ if (!is_lmac_valid(cgx, lmac_id))
+ return -ENODEV;
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL);
+
+ *rx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_RX_EN);
+ *tx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_TX_EN);
+
+ return 0;
}
void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable)
@@ -1489,6 +1602,16 @@ static int cgx_lmac_init(struct cgx *cgx)
/* Reserve first entry for default MAC address */
set_bit(0, lmac->mac_to_index_bmap.bmap);
+ lmac->rx_fc_pfvf_bmap.max = 128;
+ err = rvu_alloc_bitmap(&lmac->rx_fc_pfvf_bmap);
+ if (err)
+ goto err_dmac_bmap_free;
+
+ lmac->tx_fc_pfvf_bmap.max = 128;
+ err = rvu_alloc_bitmap(&lmac->tx_fc_pfvf_bmap);
+ if (err)
+ goto err_rx_fc_bmap_free;
+
init_waitqueue_head(&lmac->wq_cmd_cmplt);
mutex_init(&lmac->cmd_lock);
spin_lock_init(&lmac->event_cb_lock);
@@ -1505,6 +1628,10 @@ static int cgx_lmac_init(struct cgx *cgx)
return cgx_lmac_verify_fwi_version(cgx);
err_bitmap_free:
+ rvu_free_bitmap(&lmac->tx_fc_pfvf_bmap);
+err_rx_fc_bmap_free:
+ rvu_free_bitmap(&lmac->rx_fc_pfvf_bmap);
+err_dmac_bmap_free:
rvu_free_bitmap(&lmac->mac_to_index_bmap);
err_name_free:
kfree(lmac->name);
@@ -1572,6 +1699,8 @@ static struct mac_ops cgx_mac_ops = {
.mac_enadis_ptp_config = cgx_lmac_ptp_config,
.mac_rx_tx_enable = cgx_lmac_rx_tx_enable,
.mac_tx_enable = cgx_lmac_tx_enable,
+ .pfc_config = cgx_lmac_pfc_config,
+ .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg,
};
static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index ab1e4abdea38..bd2f33a26eee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -76,6 +76,13 @@
#define CGXX_SMUX_TX_CTL 0x20178
#define CGXX_SMUX_TX_PAUSE_PKT_TIME 0x20110
#define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120
+#define CGXX_SMUX_SMAC 0x20108
+#define CGXX_SMUX_CBFC_CTL 0x20218
+#define CGXX_SMUX_CBFC_CTL_RX_EN BIT_ULL(0)
+#define CGXX_SMUX_CBFC_CTL_TX_EN BIT_ULL(1)
+#define CGXX_SMUX_CBFC_CTL_DRP_EN BIT_ULL(2)
+#define CGXX_SMUX_CBFC_CTL_BCK_EN BIT_ULL(3)
+#define CGX_PFC_CLASS_MASK GENMASK_ULL(47, 32)
#define CGXX_GMP_GMI_TX_PAUSE_PKT_TIME 0x38230
#define CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL 0x38248
#define CGX_SMUX_TX_CTL_L2P_BP_CONV BIT_ULL(7)
@@ -172,4 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset);
int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index);
u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id);
u64 cgx_read_dmac_entry(void *cgxd, int index);
+int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+ u16 pfc_en);
+int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause,
+ u8 *rx_pause);
+int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause,
+ int pfvf_idx);
#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index b33e7d1d0851..f30581bf0688 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -17,6 +17,8 @@
* @resp: command response
* @link_info: link related information
* @mac_to_index_bmap: Mac address to CGX table index mapping
+ * @rx_fc_pfvf_bmap: Receive flow control enabled netdev mapping
+ * @tx_fc_pfvf_bmap: Transmit flow control enabled netdev mapping
* @event_cb: callback for linkchange events
* @event_cb_lock: lock for serializing callback with unregister
* @cgx: parent cgx port
@@ -33,6 +35,8 @@ struct lmac {
u64 resp;
struct cgx_link_user_info link_info;
struct rsrc_bmap mac_to_index_bmap;
+ struct rsrc_bmap rx_fc_pfvf_bmap;
+ struct rsrc_bmap tx_fc_pfvf_bmap;
struct cgx_event_cb event_cb;
/* lock for serializing callback with unregister */
spinlock_t event_cb_lock;
@@ -110,6 +114,12 @@ struct mac_ops {
int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable);
int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable);
+ int (*pfc_config)(void *cgxd, int lmac_id,
+ u8 tx_pause, u8 rx_pause, u16 pfc_en);
+
+ int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id,
+ u8 *tx_pause, u8 *rx_pause);
+
};
struct cgx {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 58e2aeebc14f..550cb11197bf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -172,6 +172,8 @@ M(RPM_STATS, 0x21C, rpm_stats, msg_req, rpm_stats_rsp) \
M(CGX_MAC_ADDR_RESET, 0x21D, cgx_mac_addr_reset, msg_req, msg_rsp) \
M(CGX_MAC_ADDR_UPDATE, 0x21E, cgx_mac_addr_update, cgx_mac_addr_update_req, \
msg_rsp) \
+M(CGX_PRIO_FLOW_CTRL_CFG, 0x21F, cgx_prio_flow_ctrl_cfg, cgx_pfc_cfg, \
+ cgx_pfc_rsp) \
/* NPA mbox IDs (range 0x400 - 0x5FF) */ \
M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \
npa_lf_alloc_req, npa_lf_alloc_rsp) \
@@ -609,6 +611,21 @@ struct rpm_stats_rsp {
u64 tx_stats[RPM_TX_STATS_COUNT];
};
+struct cgx_pfc_cfg {
+ struct mbox_msghdr hdr;
+ u8 rx_pause;
+ u8 tx_pause;
+ u16 pfc_en; /* bitmap indicating pfc enabled traffic classes */
+};
+
+struct cgx_pfc_rsp {
+ struct mbox_msghdr hdr;
+ u8 rx_pause;
+ u8 tx_pause;
+};
+
+ /* NPA mbox message formats */
+
struct npc_set_pkind {
struct mbox_msghdr hdr;
#define OTX2_PRIV_FLAGS_DEFAULT BIT_ULL(0)
@@ -1603,6 +1620,8 @@ enum cgx_af_status {
LMAC_AF_ERR_INVALID_PARAM = -1101,
LMAC_AF_ERR_PF_NOT_MAPPED = -1102,
LMAC_AF_ERR_PERM_DENIED = -1103,
+ LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED = -1104,
+ LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED = -1105,
};
#endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index 9ea2f6ac38ec..d7a8aad46e12 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -32,6 +32,8 @@ static struct mac_ops rpm_mac_ops = {
.mac_enadis_ptp_config = rpm_lmac_ptp_config,
.mac_rx_tx_enable = rpm_lmac_rx_tx_enable,
.mac_tx_enable = rpm_lmac_tx_enable,
+ .pfc_config = rpm_lmac_pfc_config,
+ .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg,
};
struct mac_ops *rpm_get_mac_ops(void)
@@ -96,11 +98,20 @@ int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable)
void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable)
{
rpm_t *rpm = rpmd;
+ struct lmac *lmac;
u64 cfg;
if (!rpm)
return;
+ lmac = lmac_pdata(lmac_id, rpm);
+ if (!lmac)
+ return;
+
+ /* Pause frames are not enabled just return */
+ if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max))
+ return;
+
if (enable) {
cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
@@ -122,13 +133,93 @@ int rpm_lmac_get_pause_frm_status(void *rpmd, int lmac_id,
return -ENODEV;
cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+ if (!(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE)) {
+ *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+ *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
+ }
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
return 0;
}
+static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id, u16 pfc_en,
+ bool enable)
+{
+ u64 quanta_offset = 0, quanta_thresh = 0, cfg;
+ int i, shift;
+
+ /* Set pause time and interval */
+ for_each_set_bit(i, (unsigned long *)&pfc_en, 16) {
+ switch (i) {
+ case 0:
+ case 1:
+ quanta_offset = RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL01_QUANTA_THRESH;
+ break;
+ case 2:
+ case 3:
+ quanta_offset = RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL23_QUANTA_THRESH;
+ break;
+ case 4:
+ case 5:
+ quanta_offset = RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL45_QUANTA_THRESH;
+ break;
+ case 6:
+ case 7:
+ quanta_offset = RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL67_QUANTA_THRESH;
+ break;
+ case 8:
+ case 9:
+ quanta_offset = RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL89_QUANTA_THRESH;
+ break;
+ case 10:
+ case 11:
+ quanta_offset = RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH;
+ break;
+ case 12:
+ case 13:
+ quanta_offset = RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH;
+ break;
+ case 14:
+ case 15:
+ quanta_offset = RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA;
+ quanta_thresh = RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH;
+ break;
+ }
+
+ if (!quanta_offset || !quanta_thresh)
+ continue;
+
+ shift = (i % 2) ? 1 : 0;
+ cfg = rpm_read(rpm, lmac_id, quanta_offset);
+ if (enable) {
+ cfg |= ((u64)RPM_DEFAULT_PAUSE_TIME << shift * 16);
+ } else {
+ if (!shift)
+ cfg &= ~GENMASK_ULL(15, 0);
+ else
+ cfg &= ~GENMASK_ULL(31, 16);
+ }
+ rpm_write(rpm, lmac_id, quanta_offset, cfg);
+
+ cfg = rpm_read(rpm, lmac_id, quanta_thresh);
+ if (enable) {
+ cfg |= ((u64)(RPM_DEFAULT_PAUSE_TIME / 2) << shift * 16);
+ } else {
+ if (!shift)
+ cfg &= ~GENMASK_ULL(15, 0);
+ else
+ cfg &= ~GENMASK_ULL(31, 16);
+ }
+ rpm_write(rpm, lmac_id, quanta_thresh, cfg);
+ }
+}
+
int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
u8 rx_pause)
{
@@ -152,8 +243,12 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP);
if (tx_pause) {
+ /* Configure CL0 Pause Quanta & threshold for 802.3X frames */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true);
cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id);
} else {
+ /* Disable all Pause Quanta & threshold values */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id);
cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id);
}
@@ -166,56 +261,20 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
rpm_t *rpm = rpmd;
u64 cfg;
- if (enable) {
- /* Enable 802.3 pause frame mode */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
- /* Enable receive pause frames */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
- /* Enable forward pause to TX block */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
- /* Enable pause frames transmission */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
- /* Set pause time and interval */
- cfg = rpm_read(rpm, lmac_id,
- RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA);
- cfg &= ~0xFFFFULL;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA,
- cfg | RPM_DEFAULT_PAUSE_TIME);
- /* Set pause interval as the hardware default is too short */
- cfg = rpm_read(rpm, lmac_id,
- RPMX_MTI_MAC100X_CL01_QUANTA_THRESH);
- cfg &= ~0xFFFFULL;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_QUANTA_THRESH,
- cfg | (RPM_DEFAULT_PAUSE_TIME / 2));
-
- } else {
- /* ALL pause frames received are completely ignored */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ /* ALL pause frames received are completely ignored */
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE;
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
- /* Disable forward pause to TX block */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ /* Disable forward pause to TX block */
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE;
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
- /* Disable pause frames transmission */
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
- cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
- rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
- }
+ /* Disable pause frames transmission */
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
}
int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat)
@@ -323,3 +382,65 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable)
cfg &= ~RPMX_RX_TS_PREPEND;
rpm_write(rpm, lmac_id, RPMX_CMRX_CFG, cfg);
}
+
+int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ /* reset PFC class quanta and threshold */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+
+ if (rx_pause) {
+ cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
+ } else {
+ cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
+ }
+
+ if (tx_pause) {
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, pfc_en, true);
+ cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+ } else {
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xfff, false);
+ cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+ }
+
+ if (!rx_pause && !tx_pause)
+ cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
+ else
+ cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
+
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL);
+ cfg = FIELD_SET(RPM_PFC_CLASS_MASK, pfc_en, cfg);
+ rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg);
+
+ return 0;
+}
+
+int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_pause)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ if (cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE) {
+ *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE);
+ *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE);
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index ff580311edd0..9ab8d49dd180 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -33,7 +33,21 @@
#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8)
#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE BIT_ULL(19)
#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8
+#define RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA 0x80B0
+#define RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA 0x80B8
+#define RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA 0x80C0
#define RPMX_MTI_MAC100X_CL01_QUANTA_THRESH 0x80C8
+#define RPMX_MTI_MAC100X_CL23_QUANTA_THRESH 0x80D0
+#define RPMX_MTI_MAC100X_CL45_QUANTA_THRESH 0x80D8
+#define RPMX_MTI_MAC100X_CL67_QUANTA_THRESH 0x80E0
+#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108
+#define RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA 0x8110
+#define RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA 0x8118
+#define RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA 0x8120
+#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128
+#define RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH 0x8130
+#define RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH 0x8138
+#define RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH 0x8140
#define RPM_DEFAULT_PAUSE_TIME 0xFFFF
#define RPMX_CMR_RX_OVR_BP 0x4120
#define RPMX_CMR_RX_OVR_BP_EN(x) BIT_ULL((x) + 8)
@@ -45,6 +59,18 @@
#define RPM_LMAC_FWI 0xa
#define RPM_TX_EN BIT_ULL(0)
#define RPM_RX_EN BIT_ULL(1)
+#define RPMX_CMRX_PRT_CBFC_CTL 0x5B08
+#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_RX_SHIFT 33
+#define RPMX_CMRX_PRT_CBFC_CTL_PHYS_BP_SHIFT 16
+#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_TX_SHIFT 0
+#define RPM_PFC_CLASS_MASK GENMASK_ULL(48, 33)
+#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_PAD_EN BIT_ULL(11)
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8)
+#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD BIT_ULL(7)
+#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8
+#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108
+#define RPM_DEFAULT_PAUSE_TIME 0xFFFF
/* Function Declarations */
int rpm_get_nr_lmacs(void *rpmd);
@@ -61,4 +87,8 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat);
void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable);
int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable);
int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause,
+ u16 pfc_en);
+int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause,
+ u8 *rx_pause);
#endif /* RPM_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 5ed94cfb47d2..513b43ecd5be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -807,6 +807,9 @@ u32 rvu_cgx_get_fifolen(struct rvu *rvu);
void *rvu_first_cgx_pdata(struct rvu *rvu);
int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
+int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause,
+ u16 pfc_en);
+int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause);
int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
int type);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 8a7ac5a8b821..9ffe99830e34 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -863,6 +863,45 @@ int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req,
return 0;
}
+int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 rx_pfc = 0, tx_pfc = 0;
+ struct mac_ops *mac_ops;
+ u8 cgx_id, lmac_id;
+ void *cgxd;
+
+ if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC))
+ return 0;
+
+ /* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if (!is_pf_cgxmapped(rvu, pf))
+ return LMAC_AF_ERR_PF_NOT_MAPPED;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+ mac_ops = get_mac_ops(cgxd);
+
+ mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &tx_pfc, &rx_pfc);
+ if (tx_pfc || rx_pfc) {
+ dev_warn(rvu->dev,
+ "Can not configure 802.3X flow control as PFC frames are enabled");
+ return LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED;
+ }
+
+ mutex_lock(&rvu->rsrc_lock);
+ if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause,
+ pcifunc & RVU_PFVF_FUNC_MASK)) {
+ mutex_unlock(&rvu->rsrc_lock);
+ return LMAC_AF_ERR_PERM_DENIED;
+ }
+ mutex_unlock(&rvu->rsrc_lock);
+
+ return mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, tx_pause, rx_pause);
+}
+
int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
struct cgx_pause_frm_cfg *req,
struct cgx_pause_frm_cfg *rsp)
@@ -870,11 +909,9 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
int pf = rvu_get_pf(req->hdr.pcifunc);
struct mac_ops *mac_ops;
u8 cgx_id, lmac_id;
+ int err = 0;
void *cgxd;
- if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC))
- return 0;
-
/* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
* if received from other PF/VF simply ACK, nothing to do.
*/
@@ -886,13 +923,11 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu,
mac_ops = get_mac_ops(cgxd);
if (req->set)
- mac_ops->mac_enadis_pause_frm(cgxd, lmac_id,
- req->tx_pause, req->rx_pause);
+ err = rvu_cgx_cfg_pause_frm(rvu, req->hdr.pcifunc, req->tx_pause, req->rx_pause);
else
- mac_ops->mac_get_pause_frm_status(cgxd, lmac_id,
- &rsp->tx_pause,
- &rsp->rx_pause);
- return 0;
+ mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause);
+
+ return err;
}
int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req,
@@ -1079,3 +1114,67 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu,
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index);
}
+
+int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause,
+ u8 rx_pause, u16 pfc_en)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 rx_8023 = 0, tx_8023 = 0;
+ struct mac_ops *mac_ops;
+ u8 cgx_id, lmac_id;
+ void *cgxd;
+
+ /* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if (!is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+ mac_ops = get_mac_ops(cgxd);
+
+ mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &tx_8023, &rx_8023);
+ if (tx_8023 || rx_8023) {
+ dev_warn(rvu->dev,
+ "Can not configure PFC as 802.3X pause frames are enabled");
+ return LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED;
+ }
+
+ mutex_lock(&rvu->rsrc_lock);
+ if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause,
+ pcifunc & RVU_PFVF_FUNC_MASK)) {
+ mutex_unlock(&rvu->rsrc_lock);
+ return LMAC_AF_ERR_PERM_DENIED;
+ }
+ mutex_unlock(&rvu->rsrc_lock);
+
+ return mac_ops->pfc_config(cgxd, lmac_id, tx_pause, rx_pause, pfc_en);
+}
+
+int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu,
+ struct cgx_pfc_cfg *req,
+ struct cgx_pfc_rsp *rsp)
+{
+ int pf = rvu_get_pf(req->hdr.pcifunc);
+ struct mac_ops *mac_ops;
+ u8 cgx_id, lmac_id;
+ void *cgxd;
+ int err;
+
+ /* This msg is expected only from PF/VFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if (!is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+ mac_ops = get_mac_ops(cgxd);
+
+ err = rvu_cgx_prio_flow_ctrl_cfg(rvu, req->hdr.pcifunc, req->tx_pause,
+ req->rx_pause, req->pfc_en);
+
+ mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause);
+ return err;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 97fb61915379..0fa625e2528e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -296,7 +296,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
struct rvu_hwinfo *hw = rvu->hw;
struct sdp_node_info *sdp_info;
int pkind, pf, vf, lbkid, vfid;
- struct mac_ops *mac_ops;
u8 cgx_id, lmac_id;
bool from_vf;
int err;
@@ -326,13 +325,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind);
rvu_npc_set_pkind(rvu, pkind, pfvf);
- mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu));
-
- /* By default we enable pause frames */
- if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0)
- mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id,
- rvu),
- lmac_id, true, true);
break;
case NIX_INTF_TYPE_LBK:
vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
@@ -533,7 +525,7 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
*/
switch (type) {
case NIX_INTF_TYPE_CGX:
- if ((req->chan_base + req->chan_cnt) > 15)
+ if ((req->chan_base + req->chan_cnt) > 16)
return -EINVAL;
rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id);
/* Assign bpid based on cgx, lmac and chan id */
@@ -4578,6 +4570,12 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
pfvf->hw_rx_tstamp_en = false;
}
+ /* reset priority flow control config */
+ rvu_cgx_prio_flow_ctrl_cfg(rvu, pcifunc, 0, 0, 0);
+
+ /* reset 802.3x flow control config */
+ rvu_cgx_cfg_pause_frm(rvu, pcifunc, 0, 0);
+
nix_ctx_free(rvu, pfvf);
nix_free_all_bandprof(rvu, pcifunc);
@@ -5314,6 +5312,7 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc,
aq_req.ctype = NIX_AQ_CTYPE_BANDPROF;
aq_req.op = NIX_AQ_INSTOP_WRITE;
memcpy(&aq_req.prof, &aq_rsp.prof, sizeof(struct nix_bandprof_s));
+ memset((char *)&aq_req.prof_mask, 0xff, sizeof(struct nix_bandprof_s));
/* Clear higher layer enable bit in the mid profile, just in case */
aq_req.prof.hl_en = 0;
aq_req.prof_mask.hl_en = 1;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 0048b5946712..d463dc72d80a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -11,4 +11,7 @@ rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
otx2_devlink.o
rvu_nicvf-y := otx2_vf.o otx2_devlink.o
+rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
+rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o
+
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 66da31f30d3e..2c9760814bc3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -222,8 +222,11 @@ EXPORT_SYMBOL(otx2_set_mac_address);
int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
{
struct nix_frs_cfg *req;
+ u16 maxlen;
int err;
+ maxlen = otx2_get_max_mtu(pfvf) + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN;
+
mutex_lock(&pfvf->mbox.lock);
req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox);
if (!req) {
@@ -233,6 +236,10 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
req->maxlen = pfvf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN;
+ /* Use max receive length supported by hardware for loopback devices */
+ if (is_otx2_lbkvf(pfvf->pdev))
+ req->maxlen = maxlen;
+
err = otx2_sync_mbox_msg(&pfvf->mbox);
mutex_unlock(&pfvf->mbox.lock);
return err;
@@ -262,6 +269,7 @@ unlock:
mutex_unlock(&pfvf->mbox.lock);
return err;
}
+EXPORT_SYMBOL(otx2_config_pause_frm);
int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
{
@@ -931,7 +939,11 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
if (!is_otx2_lbkvf(pfvf->pdev)) {
/* Enable receive CQ backpressure */
aq->cq.bp_ena = 1;
+#ifdef CONFIG_DCB
+ aq->cq.bpid = pfvf->bpid[pfvf->queue_to_pfc_map[qidx]];
+#else
aq->cq.bpid = pfvf->bpid[0];
+#endif
/* Set backpressure level is same as cq pass level */
aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
@@ -1211,7 +1223,11 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
*/
if (pfvf->nix_blkaddr == BLKADDR_NIX1)
aq->aura.bp_ena = 1;
+#ifdef CONFIG_DCB
+ aq->aura.nix0_bpid = pfvf->bpid[pfvf->queue_to_pfc_map[aura_id]];
+#else
aq->aura.nix0_bpid = pfvf->bpid[0];
+#endif
/* Set backpressure level for RQ's Aura */
aq->aura.bp = RQ_BP_LVL_AURA;
@@ -1538,11 +1554,18 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
return -ENOMEM;
req->chan_base = 0;
- req->chan_cnt = 1;
+#ifdef CONFIG_DCB
+ req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1;
+ req->bpid_per_chan = pfvf->pfc_en ? 1 : 0;
+#else
+ req->chan_cnt = 1;
req->bpid_per_chan = 0;
+#endif
+
return otx2_sync_mbox_msg(&pfvf->mbox);
}
+EXPORT_SYMBOL(otx2_nix_config_bp);
/* Mbox message handlers */
void mbox_handler_cgx_stats(struct otx2_nic *pfvf,
@@ -1704,6 +1727,56 @@ out:
}
EXPORT_SYMBOL(otx2_get_max_mtu);
+int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features)
+{
+ netdev_features_t changed = features ^ netdev->features;
+ struct otx2_nic *pfvf = netdev_priv(netdev);
+ bool ntuple = !!(features & NETIF_F_NTUPLE);
+ bool tc = !!(features & NETIF_F_HW_TC);
+
+ if ((changed & NETIF_F_NTUPLE) && !ntuple)
+ otx2_destroy_ntuple_flows(pfvf);
+
+ if ((changed & NETIF_F_NTUPLE) && ntuple) {
+ if (!pfvf->flow_cfg->max_flows) {
+ netdev_err(netdev,
+ "Can't enable NTUPLE, MCAM entries not allocated\n");
+ return -EINVAL;
+ }
+ }
+
+ if ((changed & NETIF_F_HW_TC) && tc) {
+ if (!pfvf->flow_cfg->max_flows) {
+ netdev_err(netdev,
+ "Can't enable TC, MCAM entries not allocated\n");
+ return -EINVAL;
+ }
+ }
+
+ if ((changed & NETIF_F_HW_TC) && !tc &&
+ pfvf->flow_cfg && pfvf->flow_cfg->nr_flows) {
+ netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
+ return -EBUSY;
+ }
+
+ if ((changed & NETIF_F_NTUPLE) && ntuple &&
+ (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) {
+ netdev_err(netdev,
+ "Can't enable NTUPLE when TC is active, disable TC and retry\n");
+ return -EINVAL;
+ }
+
+ if ((changed & NETIF_F_HW_TC) && tc &&
+ (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) {
+ netdev_err(netdev,
+ "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(otx2_handle_ntuple_tc_features);
+
#define M(_name, _id, _fn_name, _req_type, _rsp_type) \
int __weak \
otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf, \
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 14509fc64cce..7724f17ec31f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -178,6 +178,9 @@ struct otx2_hw {
u16 rqpool_cnt;
u16 sqpool_cnt;
+#define OTX2_DEFAULT_RBUF_LEN 2048
+ u16 rbuf_len;
+
/* NPA */
u32 stack_pg_ptrs; /* No of ptrs per stack page */
u32 stack_pg_bytes; /* Size of stack page */
@@ -396,6 +399,11 @@ struct otx2_nic {
/* Devlink */
struct otx2_devlink *dl;
+#ifdef CONFIG_DCB
+ /* PFC */
+ u8 pfc_en;
+ u8 *queue_to_pfc_map;
+#endif
};
static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -863,6 +871,8 @@ int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable);
int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf);
bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx);
u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
+int otx2_handle_ntuple_tc_features(struct net_device *netdev,
+ netdev_features_t features);
/* tc support */
int otx2_init_tc(struct otx2_nic *nic);
void otx2_shutdown_tc(struct otx2_nic *nic);
@@ -876,4 +886,11 @@ int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, u8 bit_pos);
int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos);
void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf);
void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf);
+
+#ifdef CONFIG_DCB
+/* DCB support*/
+void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable);
+int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf);
+int otx2_dcbnl_set_ops(struct net_device *dev);
+#endif
#endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
new file mode 100644
index 000000000000..723d2506d309
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#include "otx2_common.h"
+
+int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf)
+{
+ struct cgx_pfc_cfg *req;
+ struct cgx_pfc_rsp *rsp;
+ int err = 0;
+
+ if (is_otx2_lbkvf(pfvf->pdev))
+ return 0;
+
+ mutex_lock(&pfvf->mbox.lock);
+ req = otx2_mbox_alloc_msg_cgx_prio_flow_ctrl_cfg(&pfvf->mbox);
+ if (!req) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ if (pfvf->pfc_en) {
+ req->rx_pause = true;
+ req->tx_pause = true;
+ } else {
+ req->rx_pause = false;
+ req->tx_pause = false;
+ }
+ req->pfc_en = pfvf->pfc_en;
+
+ if (!otx2_sync_mbox_msg(&pfvf->mbox)) {
+ rsp = (struct cgx_pfc_rsp *)
+ otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+ if (req->rx_pause != rsp->rx_pause || req->tx_pause != rsp->tx_pause) {
+ dev_warn(pfvf->dev,
+ "Failed to config PFC\n");
+ err = -EPERM;
+ }
+ }
+unlock:
+ mutex_unlock(&pfvf->mbox.lock);
+ return err;
+}
+
+void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx,
+ bool pfc_enable)
+{
+ bool if_up = netif_running(pfvf->netdev);
+ struct npa_aq_enq_req *npa_aq;
+ struct nix_aq_enq_req *aq;
+ int err = 0;
+
+ if (pfvf->queue_to_pfc_map[qidx] && pfc_enable) {
+ dev_warn(pfvf->dev,
+ "PFC enable not permitted as Priority %d already mapped to Queue %d\n",
+ pfvf->queue_to_pfc_map[qidx], qidx);
+ return;
+ }
+
+ if (if_up) {
+ netif_tx_stop_all_queues(pfvf->netdev);
+ netif_carrier_off(pfvf->netdev);
+ }
+
+ pfvf->queue_to_pfc_map[qidx] = vlan_prio;
+
+ aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+ if (!aq) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ aq->cq.bpid = pfvf->bpid[vlan_prio];
+ aq->cq_mask.bpid = GENMASK(8, 0);
+
+ /* Fill AQ info */
+ aq->qidx = qidx;
+ aq->ctype = NIX_AQ_CTYPE_CQ;
+ aq->op = NIX_AQ_INSTOP_WRITE;
+
+ otx2_sync_mbox_msg(&pfvf->mbox);
+
+ npa_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+ if (!npa_aq) {
+ err = -ENOMEM;
+ goto out;
+ }
+ npa_aq->aura.nix0_bpid = pfvf->bpid[vlan_prio];
+ npa_aq->aura_mask.nix0_bpid = GENMASK(8, 0);
+
+ /* Fill NPA AQ info */
+ npa_aq->aura_id = qidx;
+ npa_aq->ctype = NPA_AQ_CTYPE_AURA;
+ npa_aq->op = NPA_AQ_INSTOP_WRITE;
+ otx2_sync_mbox_msg(&pfvf->mbox);
+
+out:
+ if (if_up) {
+ netif_carrier_on(pfvf->netdev);
+ netif_tx_start_all_queues(pfvf->netdev);
+ }
+
+ if (err)
+ dev_warn(pfvf->dev,
+ "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n",
+ qidx, err);
+}
+
+static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+ struct otx2_nic *pfvf = netdev_priv(dev);
+
+ pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+ pfc->pfc_en = pfvf->pfc_en;
+
+ return 0;
+}
+
+static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+ struct otx2_nic *pfvf = netdev_priv(dev);
+ int err;
+
+ /* Save PFC configuration to interface */
+ pfvf->pfc_en = pfc->pfc_en;
+
+ err = otx2_config_priority_flow_ctrl(pfvf);
+ if (err)
+ return err;
+
+ /* Request Per channel Bpids */
+ if (pfc->pfc_en)
+ otx2_nix_config_bp(pfvf, true);
+
+ return 0;
+}
+
+static u8 otx2_dcbnl_getdcbx(struct net_device __always_unused *dev)
+{
+ return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+static u8 otx2_dcbnl_setdcbx(struct net_device __always_unused *dev, u8 mode)
+{
+ return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
+}
+
+static const struct dcbnl_rtnl_ops otx2_dcbnl_ops = {
+ .ieee_getpfc = otx2_dcbnl_ieee_getpfc,
+ .ieee_setpfc = otx2_dcbnl_ieee_setpfc,
+ .getdcbx = otx2_dcbnl_getdcbx,
+ .setdcbx = otx2_dcbnl_setdcbx,
+};
+
+int otx2_dcbnl_set_ops(struct net_device *dev)
+{
+ struct otx2_nic *pfvf = netdev_priv(dev);
+
+ pfvf->queue_to_pfc_map = devm_kzalloc(pfvf->dev, pfvf->hw.rx_queues,
+ GFP_KERNEL);
+ if (!pfvf->queue_to_pfc_map)
+ return -ENOMEM;
+ dev->dcbnl_ops = &otx2_dcbnl_ops;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index d85db90632d6..abe5267210ef 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -371,6 +371,7 @@ static void otx2_get_ringparam(struct net_device *netdev,
ring->rx_pending = qs->rqe_cnt ? qs->rqe_cnt : Q_COUNT(Q_SIZE_256);
ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX);
ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K);
+ kernel_ring->rx_buf_len = pfvf->hw.rbuf_len;
}
static int otx2_set_ringparam(struct net_device *netdev,
@@ -379,6 +380,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
struct netlink_ext_ack *extack)
{
struct otx2_nic *pfvf = netdev_priv(netdev);
+ u32 rx_buf_len = kernel_ring->rx_buf_len;
+ u32 old_rx_buf_len = pfvf->hw.rbuf_len;
bool if_up = netif_running(netdev);
struct otx2_qset *qs = &pfvf->qset;
u32 rx_count, tx_count;
@@ -386,6 +389,15 @@ static int otx2_set_ringparam(struct net_device *netdev,
if (ring->rx_mini_pending || ring->rx_jumbo_pending)
return -EINVAL;
+ /* Hardware supports max size of 32k for a receive buffer
+ * and 1536 is typical ethernet frame size.
+ */
+ if (rx_buf_len && (rx_buf_len < 1536 || rx_buf_len > 32768)) {
+ netdev_err(netdev,
+ "Receive buffer range is 1536 - 32768");
+ return -EINVAL;
+ }
+
/* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M */
rx_count = ring->rx_pending;
/* On some silicon variants a skid or reserved CQEs are
@@ -403,7 +415,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
Q_COUNT(Q_SIZE_4K), Q_COUNT(Q_SIZE_MAX));
tx_count = Q_COUNT(Q_SIZE(tx_count, 3));
- if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt)
+ if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt &&
+ rx_buf_len == old_rx_buf_len)
return 0;
if (if_up)
@@ -413,6 +426,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
qs->sqe_cnt = tx_count;
qs->rqe_cnt = rx_count;
+ pfvf->hw.rbuf_len = rx_buf_len;
+
if (if_up)
return netdev->netdev_ops->ndo_open(netdev);
@@ -1207,6 +1222,7 @@ end:
static const struct ethtool_ops otx2_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES,
+ .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN,
.get_link = otx2_get_link,
.get_drvinfo = otx2_get_drvinfo,
.get_strings = otx2_get_strings,
@@ -1326,6 +1342,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev,
static const struct ethtool_ops otx2vf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES,
+ .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN,
.get_link = otx2_get_link,
.get_drvinfo = otx2vf_get_drvinfo,
.get_strings = otx2vf_get_strings,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 77a13fb555fb..54f235c216a9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -21,8 +21,10 @@ struct otx2_flow {
u16 entry;
bool is_vf;
u8 rss_ctx_id;
+#define DMAC_FILTER_RULE BIT(0)
+#define PFC_FLOWCTRL_RULE BIT(1)
+ u16 rule_type;
int vf;
- bool dmac_filter;
};
enum dmac_req {
@@ -899,6 +901,9 @@ static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf,
static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
{
u64 ring_cookie = flow->flow_spec.ring_cookie;
+#ifdef CONFIG_DCB
+ int vlan_prio, qidx, pfc_rule = 0;
+#endif
struct npc_install_flow_req *req;
int err, vf = 0;
@@ -940,6 +945,24 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
mutex_unlock(&pfvf->mbox.lock);
return -EINVAL;
}
+
+#ifdef CONFIG_DCB
+ /* Identify PFC rule if PFC enabled and ntuple rule is vlan */
+ if (!vf && (req->features & BIT_ULL(NPC_OUTER_VID)) &&
+ pfvf->pfc_en && req->op != NIX_RX_ACTIONOP_RSS) {
+ vlan_prio = ntohs(req->packet.vlan_tci) &
+ ntohs(req->mask.vlan_tci);
+
+ /* Get the priority */
+ vlan_prio >>= 13;
+ flow->rule_type |= PFC_FLOWCTRL_RULE;
+ /* Check if PFC enabled for this priority */
+ if (pfvf->pfc_en & BIT(vlan_prio)) {
+ pfc_rule = true;
+ qidx = req->index;
+ }
+ }
+#endif
}
/* ethtool ring_cookie has (VF + 1) for VF */
@@ -951,6 +974,12 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
/* Send message to AF */
err = otx2_sync_mbox_msg(&pfvf->mbox);
+
+#ifdef CONFIG_DCB
+ if (!err && pfc_rule)
+ otx2_update_bpid_in_rqctx(pfvf, vlan_prio, qidx, true);
+#endif
+
mutex_unlock(&pfvf->mbox.lock);
return err;
}
@@ -966,7 +995,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf,
return -ENOMEM;
pf_mac->entry = 0;
- pf_mac->dmac_filter = true;
+ pf_mac->rule_type |= DMAC_FILTER_RULE;
pf_mac->location = pfvf->flow_cfg->max_flows;
memcpy(&pf_mac->flow_spec, &flow->flow_spec,
sizeof(struct ethtool_rx_flow_spec));
@@ -1031,7 +1060,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
eth_hdr = &flow->flow_spec.h_u.ether_spec;
/* Sync dmac filter table with updated fields */
- if (flow->dmac_filter)
+ if (flow->rule_type & DMAC_FILTER_RULE)
return otx2_dmacflt_update(pfvf, eth_hdr->h_dest,
flow->entry);
@@ -1052,7 +1081,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
if (!test_bit(0, &flow_cfg->dmacflt_bmap))
otx2_add_flow_with_pfmac(pfvf, flow);
- flow->dmac_filter = true;
+ flow->rule_type |= DMAC_FILTER_RULE;
flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap,
flow_cfg->dmacflt_max_flows);
fsp->location = flow_cfg->max_flows + flow->entry;
@@ -1120,7 +1149,7 @@ static void otx2_update_rem_pfmac(struct otx2_nic *pfvf, int req)
bool found = false;
list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) {
- if (iter->dmac_filter && iter->entry == 0) {
+ if ((iter->rule_type & DMAC_FILTER_RULE) && iter->entry == 0) {
eth_hdr = &iter->flow_spec.h_u.ether_spec;
if (req == DMAC_ADDR_DEL) {
otx2_dmacflt_remove(pfvf, eth_hdr->h_dest,
@@ -1156,7 +1185,7 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location)
if (!flow)
return -ENOENT;
- if (flow->dmac_filter) {
+ if (flow->rule_type & DMAC_FILTER_RULE) {
struct ethhdr *eth_hdr = &flow->flow_spec.h_u.ether_spec;
/* user not allowed to remove dmac filter with interface mac */
@@ -1174,6 +1203,13 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location)
flow_cfg->dmacflt_max_flows) == 1)
otx2_update_rem_pfmac(pfvf, DMAC_ADDR_DEL);
} else {
+#ifdef CONFIG_DCB
+ if (flow->rule_type & PFC_FLOWCTRL_RULE)
+ otx2_update_bpid_in_rqctx(pfvf, 0,
+ flow->flow_spec.ring_cookie,
+ false);
+#endif
+
err = otx2_remove_flow_msg(pfvf, flow->entry, false);
}
@@ -1383,7 +1419,7 @@ void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf)
struct ethhdr *eth_hdr;
list_for_each_entry(iter, &pf->flow_cfg->flow_list, list) {
- if (iter->dmac_filter) {
+ if (iter->rule_type & DMAC_FILTER_RULE) {
eth_hdr = &iter->flow_spec.h_u.ether_spec;
otx2_dmacflt_add(pf, eth_hdr->h_dest,
iter->entry);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index d39341e4ab37..a5369167ab54 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1311,6 +1311,9 @@ static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu)
int total_size;
int rbuf_size;
+ if (pf->hw.rbuf_len)
+ return ALIGN(pf->hw.rbuf_len, OTX2_ALIGN) + OTX2_HEAD_ROOM;
+
/* The data transferred by NIX to memory consists of actual packet
* plus additional data which has timestamp and/or EDSA/HIGIG2
* headers if interface is configured in corresponding modes.
@@ -1694,9 +1697,6 @@ int otx2_open(struct net_device *netdev)
if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK))
otx2_handle_link_event(pf);
- /* Restore pause frame settings */
- otx2_config_pause_frm(pf);
-
/* Install DMAC Filters */
if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
otx2_dmacflt_reinstall_flows(pf);
@@ -1863,9 +1863,7 @@ static int otx2_set_features(struct net_device *netdev,
netdev_features_t features)
{
netdev_features_t changed = features ^ netdev->features;
- bool ntuple = !!(features & NETIF_F_NTUPLE);
struct otx2_nic *pf = netdev_priv(netdev);
- bool tc = !!(features & NETIF_F_HW_TC);
if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
return otx2_cgx_config_loopback(pf,
@@ -1875,46 +1873,7 @@ static int otx2_set_features(struct net_device *netdev,
return otx2_enable_rxvlan(pf,
features & NETIF_F_HW_VLAN_CTAG_RX);
- if ((changed & NETIF_F_NTUPLE) && !ntuple)
- otx2_destroy_ntuple_flows(pf);
-
- if ((changed & NETIF_F_NTUPLE) && ntuple) {
- if (!pf->flow_cfg->max_flows) {
- netdev_err(netdev,
- "Can't enable NTUPLE, MCAM entries not allocated\n");
- return -EINVAL;
- }
- }
-
- if ((changed & NETIF_F_HW_TC) && tc) {
- if (!pf->flow_cfg->max_flows) {
- netdev_err(netdev,
- "Can't enable TC, MCAM entries not allocated\n");
- return -EINVAL;
- }
- }
-
- if ((changed & NETIF_F_HW_TC) && !tc &&
- pf->flow_cfg && pf->flow_cfg->nr_flows) {
- netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
- return -EBUSY;
- }
-
- if ((changed & NETIF_F_NTUPLE) && ntuple &&
- (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) {
- netdev_err(netdev,
- "Can't enable NTUPLE when TC is active, disable TC and retry\n");
- return -EINVAL;
- }
-
- if ((changed & NETIF_F_HW_TC) && tc &&
- (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) {
- netdev_err(netdev,
- "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n");
- return -EINVAL;
- }
-
- return 0;
+ return otx2_handle_ntuple_tc_features(netdev, features);
}
static void otx2_reset_task(struct work_struct *work)
@@ -2625,6 +2584,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hw->tx_queues = qcount;
hw->tot_tx_queues = qcount;
hw->max_queues = qcount;
+ hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
num_vec = pci_msix_vec_count(pdev);
hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
@@ -2778,9 +2738,11 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Enable link notifications */
otx2_cgx_config_linkevents(pf, true);
- /* Enable pause frames by default */
- pf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
- pf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
+#ifdef CONFIG_DCB
+ err = otx2_dcbnl_set_ops(netdev);
+ if (err)
+ goto err_pf_sriov_init;
+#endif
return 0;
@@ -2925,6 +2887,21 @@ static void otx2_remove(struct pci_dev *pdev)
if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)
otx2_config_hw_rx_tstamp(pf, false);
+ /* Disable 802.3x pause frames */
+ if (pf->flags & OTX2_FLAG_RX_PAUSE_ENABLED ||
+ (pf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) {
+ pf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED;
+ pf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED;
+ otx2_config_pause_frm(pf);
+ }
+
+#ifdef CONFIG_DCB
+ /* Disable PFC config */
+ if (pf->pfc_en) {
+ pf->pfc_en = 0;
+ otx2_config_priority_flow_ctrl(pf);
+ }
+#endif
cancel_work_sync(&pf->reset_task);
/* Disable link notifications */
otx2_cgx_config_linkevents(pf, false);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 626961a41089..0593106d7161 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -58,7 +58,7 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
{
struct otx2_tc_info *tc = &nic->tc_info;
- if (!nic->flow_cfg->max_flows || is_otx2_vf(nic->pcifunc))
+ if (!nic->flow_cfg->max_flows)
return 0;
/* Max flows changed, free the existing bitmap */
@@ -1023,6 +1023,7 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
return -EOPNOTSUPP;
}
}
+EXPORT_SYMBOL(otx2_setup_tc);
static const struct rhashtable_params tc_flow_ht_params = {
.head_offset = offsetof(struct otx2_tc_flow, node),
@@ -1052,6 +1053,7 @@ int otx2_init_tc(struct otx2_nic *nic)
tc->flow_ht_params = tc_flow_ht_params;
return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
}
+EXPORT_SYMBOL(otx2_init_tc);
void otx2_shutdown_tc(struct otx2_nic *nic)
{
@@ -1060,3 +1062,4 @@ void otx2_shutdown_tc(struct otx2_nic *nic)
kfree(tc->tc_entries_bitmap);
rhashtable_destroy(&tc->flow_table);
}
+EXPORT_SYMBOL(otx2_shutdown_tc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 925b74ebb8b0..a232e202f6a4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -472,23 +472,7 @@ static void otx2vf_reset_task(struct work_struct *work)
static int otx2vf_set_features(struct net_device *netdev,
netdev_features_t features)
{
- netdev_features_t changed = features ^ netdev->features;
- bool ntuple_enabled = !!(features & NETIF_F_NTUPLE);
- struct otx2_nic *vf = netdev_priv(netdev);
-
- if (changed & NETIF_F_NTUPLE) {
- if (!ntuple_enabled) {
- otx2_mcam_flow_del(vf);
- return 0;
- }
-
- if (!otx2_get_maxflows(vf->flow_cfg)) {
- netdev_err(netdev,
- "Can't enable NTUPLE, MCAM entries not allocated\n");
- return -EINVAL;
- }
- }
- return 0;
+ return otx2_handle_ntuple_tc_features(netdev, features);
}
static const struct net_device_ops otx2vf_netdev_ops = {
@@ -502,6 +486,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
.ndo_get_stats64 = otx2_get_stats64,
.ndo_tx_timeout = otx2_tx_timeout,
.ndo_eth_ioctl = otx2_ioctl,
+ .ndo_setup_tc = otx2_setup_tc,
};
static int otx2_wq_init(struct otx2_nic *vf)
@@ -586,6 +571,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hw->tx_queues = qcount;
hw->max_queues = qcount;
hw->tot_tx_queues = qcount;
+ hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
GFP_KERNEL);
@@ -662,6 +648,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
netdev->hw_features |= NETIF_F_NTUPLE;
netdev->hw_features |= NETIF_F_RXALL;
+ netdev->hw_features |= NETIF_F_HW_TC;
netif_set_gso_max_segs(netdev, OTX2_MAX_GSO_SEGS);
netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
@@ -697,16 +684,24 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_unreg_netdev;
- err = otx2_register_dl(vf);
+ err = otx2_init_tc(vf);
if (err)
goto err_unreg_netdev;
- /* Enable pause frames by default */
- vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
- vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
+ err = otx2_register_dl(vf);
+ if (err)
+ goto err_shutdown_tc;
+
+#ifdef CONFIG_DCB
+ err = otx2_dcbnl_set_ops(netdev);
+ if (err)
+ goto err_shutdown_tc;
+#endif
return 0;
+err_shutdown_tc:
+ otx2_shutdown_tc(vf);
err_unreg_netdev:
unregister_netdev(netdev);
err_ptp_destroy:
@@ -739,6 +734,22 @@ static void otx2vf_remove(struct pci_dev *pdev)
vf = netdev_priv(netdev);
+ /* Disable 802.3x pause frames */
+ if (vf->flags & OTX2_FLAG_RX_PAUSE_ENABLED ||
+ (vf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) {
+ vf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED;
+ vf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED;
+ otx2_config_pause_frm(vf);
+ }
+
+#ifdef CONFIG_DCB
+ /* Disable PFC config */
+ if (vf->pfc_en) {
+ vf->pfc_en = 0;
+ otx2_config_priority_flow_ctrl(vf);
+ }
+#endif
+
cancel_work_sync(&vf->reset_task);
otx2_unregister_dl(vf);
unregister_netdev(netdev);
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 89ca7960b225..4cd0747edaff 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev)
return devm_register_netdev(dev, ndev);
}
+#ifdef CONFIG_OF
static const struct of_device_id mtk_star_of_match[] = {
{ .compatible = "mediatek,mt8516-eth", },
{ .compatible = "mediatek,mt8518-eth", },
@@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = {
{ }
};
MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+#endif
static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
mtk_star_suspend, mtk_star_resume);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
index b0de6b999675..2b53738938a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
@@ -20,7 +21,7 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
{
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index 26efa33de56f..bfbc91c116a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -16,12 +16,12 @@ struct mlx5e_tc_act_parse_state {
unsigned int num_actions;
struct mlx5e_tc_flow *flow;
struct netlink_ext_ack *extack;
+ bool ct;
bool encap;
bool decap;
bool mpls_push;
bool ptype_host;
const struct ip_tunnel_info *tun_info;
- struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
int if_count;
struct mlx5_tc_ct_priv *ct_priv;
@@ -30,7 +30,8 @@ struct mlx5e_tc_act_parse_state {
struct mlx5e_tc_act {
bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index);
+ int act_index,
+ struct mlx5_flow_attr *attr);
int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
index 29920ef0180a..c0f08ae6a57f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -38,11 +38,12 @@ csum_offload_supported(struct mlx5e_priv *priv,
static bool
tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct mlx5e_tc_flow *flow = parse_state->flow;
- return csum_offload_supported(flow->priv, flow->attr->action,
+ return csum_offload_supported(flow->priv, attr->action,
act->csum_flags, parse_state->extack);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
index 06ec30cdb269..85f0cb88127f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -8,8 +8,10 @@
static bool
tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
struct netlink_ext_ack *extack = parse_state->extack;
if (flow_flag_test(parse_state->flow, SAMPLE)) {
@@ -18,6 +20,11 @@ tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
return false;
}
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supoported");
+ return false;
+ }
+
return true;
}
@@ -27,6 +34,7 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
int err;
err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
@@ -35,11 +43,16 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
if (err)
return err;
- flow_flag_set(parse_state->flow, CT);
if (mlx5e_is_eswitch_flow(parse_state->flow))
attr->esw_attr->split_count = attr->esw_attr->out_count;
+ if (!clear_action) {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
index 2e29a23bed12..3d5f23636a02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
index f44515061228..fb1be822ad25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -8,6 +8,7 @@
static int
validate_goto_chain(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
@@ -32,7 +33,7 @@ validate_goto_chain(struct mlx5e_priv *priv,
}
if (!mlx5_chains_backwards_supported(chains) &&
- dest_chain <= flow->attr->chain) {
+ dest_chain <= attr->chain) {
NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
return -EOPNOTSUPP;
}
@@ -43,8 +44,8 @@ validate_goto_chain(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
!reformat_and_fwd) {
NL_SET_ERR_MSG_MOD(extack,
"Goto chain is not allowed if action has reformat or decap");
@@ -57,12 +58,13 @@ validate_goto_chain(struct mlx5e_priv *priv,
static bool
tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5e_tc_flow *flow = parse_state->flow;
- if (validate_goto_chain(flow->priv, flow, act, extack))
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
return false;
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
index d775c3d9edf3..e8d227595b3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index c614fc7fdc9c..99fb98b3e71b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -99,7 +99,8 @@ get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
static bool
tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5e_tc_flow *flow = parse_state->flow;
@@ -108,8 +109,8 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv = flow->priv;
struct mlx5_esw_flow_attr *esw_attr;
- parse_attr = flow->attr->parse_attr;
- esw_attr = flow->attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
if (!out_dev) {
/* out_dev is NULL when filters with
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
index 2c74567b6d25..16681cf6e93e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5e_tc_flow *flow = parse_state->flow;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
index 784fc4f68b1e..40332949509a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -8,7 +8,8 @@
static bool
tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5e_priv *priv = parse_state->flow->priv;
@@ -36,13 +37,13 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
static bool
tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
- struct mlx5e_tc_flow *flow = parse_state->flow;
struct net_device *filter_dev;
- filter_dev = flow->attr->parse_attr->filter_dev;
+ filter_dev = attr->parse_attr->filter_dev;
/* we only support mpls pop if it is the first action
* and the filter net device is bareudp. Subsequent
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
index 79addbbef087..39f8f71bed9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -46,9 +46,9 @@ static int
parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
const struct flow_action_entry *act, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
struct netlink_ext_ack *extack)
{
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
u8 htype = act->mangle.htype;
int err = -EOPNOTSUPP;
@@ -110,20 +110,20 @@ int
mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
return parse_pedit_to_reformat(act, parse_attr, extack);
- return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack);
+ return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, extack);
}
static bool
tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
@@ -141,8 +141,7 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
ns_type = mlx5e_get_flow_namespace(flow);
- err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type,
- attr->parse_attr, parse_state->hdrs,
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr,
flow, parse_state->extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
index da8ab03af58f..258f030a2dc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -25,7 +25,6 @@ int
mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
index 0819110193dc..6454b031ff7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
index 1c32e24e528d..9dd244147385 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -7,16 +7,16 @@
static bool
tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
- struct mlx5e_tc_flow *flow = parse_state->flow;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct net_device *out_dev = act->dev;
struct mlx5_esw_flow_attr *esw_attr;
- parse_attr = flow->attr->parse_attr;
- esw_attr = flow->attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
if (!out_dev)
return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
index 6699bdf5cf01..539fea13ce9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -8,7 +8,8 @@
static bool
tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
@@ -27,11 +28,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
- struct mlx5e_sample_attr *sample_attr;
-
- sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
- if (!sample_attr)
- return -ENOMEM;
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
sample_attr->rate = act->sample.rate;
sample_attr->group_num = act->sample.psample_group->group_num;
@@ -39,7 +36,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
if (act->sample.truncate)
sample_attr->trunc_size = act->sample.trunc_size;
- attr->sample_attr = sample_attr;
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
flow_flag_set(parse_state->flow, SAMPLE);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
index 046b64c2cec4..9ea293fdc434 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -7,7 +7,8 @@
static bool
tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
struct netlink_ext_ack *extack = parse_state->extack;
@@ -27,7 +28,7 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
{
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
index 6f4a2cf46afd..b4fa2de9711d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -8,7 +8,8 @@
static bool
tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
if (!act->tunnel) {
NL_SET_ERR_MSG_MOD(parse_state->extack,
@@ -34,7 +35,8 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
static bool
tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
index 70fc0c2d8813..6378b7558ba2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -9,7 +9,6 @@
static int
add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
u32 *action, struct netlink_ext_ack *extack)
{
const struct flow_action_entry prio_tag_act = {
@@ -26,7 +25,7 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
};
return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- &prio_tag_act, parse_attr, hdrs, action,
+ &prio_tag_act, parse_attr, action,
extack);
}
@@ -151,7 +150,8 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
static bool
tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
@@ -170,8 +170,8 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
/* Replace vlan pop+push with vlan modify */
attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
- attr->parse_attr, parse_state->hdrs,
- &attr->action, parse_state->extack);
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
} else {
err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
parse_state->extack);
@@ -191,7 +191,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5_flow_attr *attr)
{
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
- struct pedit_headers_action *hdrs = parse_state->hdrs;
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int err;
@@ -202,7 +201,7 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
* tag rewrite.
*/
attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
- err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
&attr->action, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
index 3d62f13ab61f..2fa58c6f44eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -24,7 +24,6 @@ int
mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
const struct flow_action_entry *act,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
u32 *action, struct netlink_ext_ack *extack);
#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
index 63e36e7f53e3..28444d4ffd73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -12,7 +12,6 @@ int
mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
const struct flow_action_entry *act,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
u32 *action, struct netlink_ext_ack *extack)
{
u16 mask16 = VLAN_VID_MASK;
@@ -44,7 +43,7 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
return -EOPNOTSUPP;
}
- err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs,
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr,
NULL, extack);
*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
@@ -54,7 +53,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
static bool
tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
- int act_index)
+ int act_index,
+ struct mlx5_flow_attr *attr)
{
return true;
}
@@ -69,8 +69,7 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
int err;
ns_type = mlx5e_get_flow_namespace(parse_state->flow);
- err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act,
- attr->parse_attr, parse_state->hdrs,
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
&attr->action, parse_state->extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
index 31b4e39be2d3..9e0e229cf164 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -101,6 +101,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
post_attr->inner_match_level = MLX5_MATCH_NONE;
post_attr->outer_match_level = MLX5_MATCH_NONE;
post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+ post_attr->flags &= ~MLX5_ATTR_FLAG_SAMPLE;
handle->ns_type = post_act->ns_type;
/* Splits were handled before post action */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index ff4b4f8a5a9d..32230e677029 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -403,7 +403,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
post_attr->chain = 0;
post_attr->prio = 0;
post_attr->ft = default_tbl;
- post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
/* When offloading sample and encap action, if there is no valid
* neigh data struct, a slow path rule is offloaded first. Source
@@ -492,8 +492,7 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{
struct mlx5e_post_act_handle *post_act_handle = NULL;
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
@@ -502,6 +501,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5e_sample_flow *sample_flow;
struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
+ u32 tunnel_id = attr->tunnel_id;
struct mlx5_eswitch *esw;
u32 default_tbl_id;
u32 obj_id;
@@ -513,7 +513,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
if (!sample_flow)
return ERR_PTR(-ENOMEM);
- sample_attr = attr->sample_attr;
+ sample_attr = &attr->sample_attr;
sample_attr->sample_flow = sample_flow;
/* For NICs with reg_c_preserve support or decap action, use
@@ -546,6 +546,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
err = PTR_ERR(sample_flow->sampler);
goto err_sampler;
}
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
/* Create an id mapping reg_c0 value to sample object. */
restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
@@ -580,13 +581,12 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
if (tunnel_id)
pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
- pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
pre_attr->inner_match_level = attr->inner_match_level;
pre_attr->outer_match_level = attr->outer_match_level;
pre_attr->chain = attr->chain;
pre_attr->prio = attr->prio;
- pre_attr->sample_attr = attr->sample_attr;
- sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+ pre_attr->sample_attr = *sample_attr;
pre_esw_attr = pre_attr->esw_attr;
pre_esw_attr->in_mdev = esw_attr->in_mdev;
pre_esw_attr->in_rep = esw_attr->in_rep;
@@ -633,11 +633,11 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
* will hit fw syndromes.
*/
esw = tc_psample->esw;
- sample_flow = attr->sample_attr->sample_flow;
+ sample_flow = attr->sample_attr.sample_flow;
mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
sample_restore_put(tc_psample, sample_flow->restore);
- mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
sampler_put(tc_psample, sample_flow->sampler);
if (sample_flow->post_act_handle)
mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
index 9ef8a49d7801..a569367eae4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id);
+ struct mlx5_flow_attr *attr);
void
mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
@@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
static inline struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{ return ERR_PTR(-EOPNOTSUPP); }
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 4a0d38d219ed..0f4d3b9dd979 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -809,7 +809,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
attr->outer_match_level = MLX5_MATCH_L4;
attr->counter = entry->counter->counter;
- attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
attr->esw_attr->in_mdev = priv->mdev;
@@ -1787,7 +1787,6 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
*/
static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *orig_spec,
struct mlx5_flow_attr *attr)
{
@@ -1871,12 +1870,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
*/
if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
attr->chain == 0) {
- u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
-
err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
ct_priv->ns_type,
TUNNEL_TO_REG,
- tun_id);
+ attr->tunnel_id);
if (err) {
ct_dbg("Failed to set tunnel register mapping");
goto err_mapping;
@@ -1926,87 +1923,19 @@ err_ft:
return ERR_PTR(err);
}
-static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_flow_spec *orig_spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_flow_handle *rule;
- struct mlx5_ct_flow *ct_flow;
- int err;
-
- ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!ct_flow)
- return ERR_PTR(-ENOMEM);
-
- /* Base esw attributes on original rule attribute */
- pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!pre_ct_attr) {
- err = -ENOMEM;
- goto err_attr;
- }
-
- memcpy(pre_ct_attr, attr, attr_sz);
-
- mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- mod_acts->num_actions,
- mod_acts->actions);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- ct_dbg("Failed to add create ct clear mod hdr");
- goto err_mod_hdr;
- }
-
- pre_ct_attr->modify_hdr = mod_hdr;
-
- rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- ct_dbg("Failed to add ct clear rule");
- goto err_insert;
- }
-
- attr->ct_attr.ct_flow = ct_flow;
- ct_flow->pre_ct_attr = pre_ct_attr;
- ct_flow->pre_ct_rule = rule;
- return rule;
-
-err_insert:
- mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_mod_hdr:
- netdev_warn(priv->netdev,
- "Failed to offload ct clear flow, err %d\n", err);
- kfree(pre_ct_attr);
-err_attr:
- kfree(ct_flow);
-
- return ERR_PTR(err);
-}
-
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
- bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
struct mlx5_flow_handle *rule;
if (!priv)
return ERR_PTR(-EOPNOTSUPP);
mutex_lock(&priv->control_lock);
-
- if (clear_action)
- rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
- else
- rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
mutex_unlock(&priv->control_lock);
return rule;
@@ -2014,14 +1943,13 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5_ct_flow *ct_flow)
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
- pre_ct_attr);
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
if (ct_flow->post_act_handle) {
@@ -2036,7 +1964,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
@@ -2048,7 +1975,7 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
return;
mutex_lock(&priv->control_lock);
- __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
mutex_unlock(&priv->control_lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 99662af1e41a..2b21c7b97a52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -116,13 +116,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
bool
@@ -183,7 +181,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
static inline struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
@@ -193,7 +190,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static inline void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index f832c26ff2c3..9ffba584b982 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -37,6 +37,7 @@ struct mlx5e_tc_flow_parse_attr {
const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
struct ethhdr eth;
@@ -107,10 +108,19 @@ struct mlx5e_tc_flow {
struct rcu_head rcu_head;
struct completion init_done;
struct completion del_hw_done;
- int tunnel_id; /* the mapped tunnel id of this flow */
struct mlx5_flow_attr *attr;
};
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
struct mlx5_flow_handle *
@@ -173,6 +183,7 @@ struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec);
+
void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 9918ed8c059b..1f8d339ff0c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -488,12 +488,14 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
int out_index);
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
{
struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (flow->attr->esw_attr->dests[out_index].flags &
+ if (attr->esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
mlx5e_detach_encap_route(priv, flow, out_index);
@@ -733,6 +735,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -740,6 +743,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
@@ -748,7 +752,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
unsigned long tbl_time_before = 0;
struct mlx5e_encap_entry *e;
@@ -834,8 +837,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
e->compl_result = 1;
attach_flow:
- err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
- out_index);
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
if (err)
goto out_err;
@@ -1198,6 +1201,7 @@ out:
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -1206,7 +1210,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned long tbl_time_after = tbl_time_before;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_route_entry *r;
@@ -1377,7 +1380,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
continue;
}
- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
index 3391504d9a08..d542b8476491 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -7,15 +7,19 @@
#include "tc_priv.h"
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
+
int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2022fa4a9598..099d4ce16049 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -295,13 +295,62 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
if (is_mdev_switchdev_mode(priv->mdev)) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-
return;
}
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
}
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
+ &attr->parse_attr->mod_hdr_acts;
+
+ return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
+ spec, attr,
+ mod_hdr_acts);
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
+ return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
+
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+}
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
+ return;
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+ return;
+ }
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
+ mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
+ return;
+ }
+
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+}
+
int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
@@ -1039,6 +1088,21 @@ err_ft_get:
}
static int
+alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
+ struct mlx5_flow_attr *attr)
+
+{
+ struct mlx5_fc *counter;
+
+ counter = mlx5_fc_create(counter_dev, true);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ attr->counter = counter;
+ return 0;
+}
+
+static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -1046,7 +1110,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_core_dev *dev = priv->mdev;
- struct mlx5_fc *counter;
int err;
parse_attr = attr->parse_attr;
@@ -1058,11 +1121,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
-
- attr->counter = counter;
+ err = alloc_flow_attr_counter(dev, attr);
+ if (err)
+ return err;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
@@ -1072,8 +1133,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
return err;
}
- if (flow_flag_test(flow, CT))
- flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
attr, &parse_attr->mod_hdr_acts);
else
flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
@@ -1107,8 +1168,8 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
flow_flag_clear(flow, OFFLOADED);
- if (flow_flag_test(flow, CT))
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
else if (!IS_ERR_OR_NULL(flow->rule[0]))
mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
@@ -1142,40 +1203,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
struct mlx5_flow_handle *rule;
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- if (flow_flag_test(flow, CT)) {
- mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
-
- rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
- flow, spec, attr,
- mod_hdr_acts);
- } else if (flow_flag_test(flow, SAMPLE)) {
- rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
- mlx5e_tc_get_flow_tun_id(flow));
- } else {
- rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- }
+ rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
if (IS_ERR(rule))
return rule;
if (attr->esw_attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
- if (IS_ERR(flow->rule[1])) {
- if (flow_flag_test(flow, CT))
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
- else
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- return flow->rule[1];
- }
+ if (IS_ERR(flow->rule[1]))
+ goto err_rule1;
}
return rule;
+
+err_rule1:
+ mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
+ return flow->rule[1];
}
void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1184,19 +1232,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
{
flow_flag_clear(flow, OFFLOADED);
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
- goto offload_rule_0;
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
if (attr->esw_attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
- if (flow_flag_test(flow, CT))
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
- else if (flow_flag_test(flow, SAMPLE))
- mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
- else
-offload_rule_0:
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
}
struct mlx5_flow_handle *
@@ -1214,7 +1256,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->esw_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
@@ -1239,7 +1281,7 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->esw_attr->split_count = 0;
- slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow_flag_clear(flow, SLOW);
kfree(slow_attr);
@@ -1348,10 +1390,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
}
int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
struct mlx5_modify_hdr *mod_hdr;
mod_hdr = mlx5_modify_header_alloc(priv->mdev,
@@ -1361,13 +1403,101 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
if (IS_ERR(mod_hdr))
return PTR_ERR(mod_hdr);
- WARN_ON(flow->attr->modify_hdr);
- flow->attr->modify_hdr = mod_hdr;
+ WARN_ON(attr->modify_hdr);
+ attr->modify_hdr = mod_hdr;
return 0;
}
static int
+set_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *encap_valid,
+ bool *vf_tun)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *encap_dev = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int out_index;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+ *encap_valid = true;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ struct net_device *out_dev;
+ int mirred_ifindex;
+
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+ if (!out_dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+ err = -ENODEV;
+ goto out;
+ }
+ err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+ extack, &encap_dev, encap_valid);
+ dev_put(out_dev);
+ if (err)
+ goto out;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
+ }
+
+ if (*vf_tun && esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static void
+clean_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ bool *vf_tun)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ int out_index;
+
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ mlx5e_detach_encap(priv, flow, attr, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
+}
+
+static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -1375,15 +1505,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
- bool vf_tun = false, encap_valid = true;
- struct net_device *encap_dev = NULL;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *out_priv;
- struct mlx5_fc *counter;
+ bool vf_tun, encap_valid;
u32 max_prio, max_chain;
int err = 0;
- int out_index;
parse_attr = attr->parse_attr;
esw_attr = attr->esw_attr;
@@ -1472,50 +1597,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
esw_attr->int_port = int_port;
}
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- struct net_device *out_dev;
- int mirred_ifindex;
-
- if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
- continue;
-
- mirred_ifindex = parse_attr->mirred_ifindex[out_index];
- out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
- if (!out_dev) {
- NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
- err = -ENODEV;
- goto err_out;
- }
- err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
- extack, &encap_dev, &encap_valid);
- dev_put(out_dev);
- if (err)
- goto err_out;
-
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- vf_tun = true;
- out_priv = netdev_priv(encap_dev);
- rpriv = out_priv->ppriv;
- esw_attr->dests[out_index].rep = rpriv->rep;
- esw_attr->dests[out_index].mdev = out_priv->mdev;
- }
-
- if (vf_tun && esw_attr->out_count > 1) {
- NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
- err = -EOPNOTSUPP;
+ err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
+ if (err)
goto err_out;
- }
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
goto err_out;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
if (vf_tun) {
- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
if (err)
goto err_out;
} else {
@@ -1526,13 +1618,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw_attr->counter_dev, true);
- if (IS_ERR(counter)) {
- err = PTR_ERR(counter);
+ err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
+ if (err)
goto err_out;
- }
-
- attr->counter = counter;
}
/* we get here if one of the following takes place:
@@ -1576,8 +1664,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- bool vf_tun = false;
- int out_index;
+ bool vf_tun;
esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
@@ -1601,16 +1688,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow->decap_route)
mlx5e_detach_decap_route(priv, flow);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- vf_tun = true;
- if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
- mlx5e_detach_encap(priv, flow, out_index);
- kfree(attr->parse_attr->tun_info[out_index]);
- }
- }
+ clean_encap_dests(priv, flow, attr, &vf_tun);
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
@@ -1634,7 +1712,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
- kfree(attr->sample_attr);
kvfree(attr->esw_attr->rx_tun_attr);
kvfree(attr->parse_attr);
kfree(flow->attr);
@@ -1854,7 +1931,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
- flow->tunnel_id = value;
+ flow->attr->tunnel_id = value;
return 0;
err_set:
@@ -1868,8 +1945,8 @@ err_enc_opts:
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
{
- u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
- u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
+ u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
+ u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_eswitch *esw;
@@ -1885,11 +1962,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
enc_opts_id);
}
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
-{
- return flow->tunnel_id;
-}
-
void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
struct flow_match_basic *match, bool outer,
void *headers_c, void *headers_v)
@@ -2811,14 +2883,15 @@ static unsigned long mask_to_le(unsigned long mask, int size)
return mask;
}
+
static int offload_pedit_fields(struct mlx5e_priv *priv,
int namespace,
- struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
void *headers_c, *headers_v, *action, *vals_p;
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
@@ -2944,35 +3017,43 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
static const struct pedit_headers zero_masks = {};
-static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct pedit_headers_action *hdrs,
- u32 *action_flags,
- struct netlink_ext_ack *extack)
+static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
struct pedit_headers *cmd_masks;
- int err;
u8 cmd;
- err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
- action_flags, extack);
- if (err < 0)
- goto out_dealloc_parsed_actions;
-
for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
- cmd_masks = &hdrs[cmd].masks;
+ cmd_masks = &parse_attr->hdrs[cmd].masks;
if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
- NL_SET_ERR_MSG_MOD(extack,
- "attempt to offload an unsupported field");
+ NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
16, 1, cmd_masks, sizeof(zero_masks), true);
- err = -EOPNOTSUPP;
- goto out_dealloc_parsed_actions;
+ return -EOPNOTSUPP;
}
}
return 0;
+}
+
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action_flags,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
+
+ err = verify_offload_pedit_fields(priv, parse_attr, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
+
+ return 0;
out_dealloc_parsed_actions:
mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
@@ -3257,7 +3338,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
return -EOPNOTSUPP;
}
- if (!tc_act->can_offload(parse_state, act, i))
+ if (!tc_act->can_offload(parse_state, act, i, attr))
return -EOPNOTSUPP;
err = tc_act->parse_action(parse_state, act, priv, attr);
@@ -3268,7 +3349,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
flow_action_for_each(i, act, flow_action) {
tc_act = mlx5e_tc_act_get(act->id, ns_type);
if (!tc_act || !tc_act->post_parse ||
- !tc_act->can_offload(parse_state, act, i))
+ !tc_act->can_offload(parse_state, act, i, attr))
continue;
err = tc_act->post_parse(parse_state, priv, attr);
@@ -3283,10 +3364,10 @@ static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr,
- struct pedit_headers_action *hdrs,
struct netlink_ext_ack *extack)
{
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
enum mlx5_flow_namespace_type ns_type;
int err;
@@ -3296,8 +3377,7 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
ns_type = mlx5e_get_flow_namespace(flow);
- err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs,
- &attr->action, extack);
+ err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
if (err)
return err;
@@ -3345,7 +3425,6 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_act_parse_state *parse_state;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
- struct pedit_headers_action *hdrs;
int err;
err = flow_action_supported(flow_action, extack);
@@ -3357,13 +3436,12 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
parse_state = &parse_attr->parse_state;
mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
parse_state->ct_priv = get_ct_priv(priv);
- hdrs = parse_state->hdrs;
err = parse_tc_actions(parse_state, flow_action);
if (err)
return err;
- err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
if (err)
return err;
@@ -3468,7 +3546,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- struct pedit_headers_action *hdrs;
int err;
err = flow_action_supported(flow_action, extack);
@@ -3480,7 +3557,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
parse_state = &parse_attr->parse_state;
mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
parse_state->ct_priv = get_ct_priv(priv);
- hdrs = parse_state->hdrs;
err = parse_tc_actions(parse_state, flow_action);
if (err)
@@ -3494,7 +3570,7 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 5ffae9b13066..c6221728b767 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -71,7 +71,7 @@ struct mlx5_flow_attr {
struct mlx5_fc *counter;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_ct_attr ct_attr;
- struct mlx5e_sample_attr *sample_attr;
+ struct mlx5e_sample_attr sample_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
u32 chain;
u16 prio;
@@ -82,6 +82,7 @@ struct mlx5_flow_attr {
u8 outer_match_level;
u8 ip_version;
u8 tun_ip_version;
+ int tunnel_id; /* mapped tunnel id */
u32 flags;
union {
struct mlx5_esw_flow_attr esw_attr[0];
@@ -89,6 +90,23 @@ struct mlx5_flow_attr {
};
};
+enum {
+ MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0),
+ MLX5_ATTR_FLAG_SLOW_PATH = BIT(1),
+ MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2),
+ MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3),
+ MLX5_ATTR_FLAG_SAMPLE = BIT(4),
+ MLX5_ATTR_FLAG_ACCEPT = BIT(5),
+ MLX5_ATTR_FLAG_CT = BIT(6),
+};
+
+/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
+static inline bool
+mlx5e_tc_attr_flags_skip(u32 attr_flags)
+{
+ return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT);
+}
+
struct mlx5_rx_tun_attr {
u16 decap_vport;
union {
@@ -243,11 +261,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
u32 data);
int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow);
-
-struct mlx5e_tc_flow;
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
struct flow_match_basic *match, bool outer,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index c275fe028b6d..0abef71cb839 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -86,7 +86,7 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
mlx5_eswitch_is_vf_vport(esw, vport_num) &&
esw->dev == dest_mdev &&
attr->ip_version &&
- attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+ attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
}
u16
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ead5e8acc8be..44321cdfe928 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -448,22 +448,6 @@ enum {
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
};
-enum {
- MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
- MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
- MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2),
- MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3),
- MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4),
- MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5),
-};
-
-/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
-static inline bool
-mlx5_esw_attr_flags_skip(u32 attr_flags)
-{
- return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT);
-}
-
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
struct mlx5_core_dev *in_mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9a7b25692505..2b31d8bbd1b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -180,7 +180,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw,
{
struct mlx5_flow_table *ft;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
ft = mlx5_esw_indir_table_get(esw, attr, spec,
@@ -201,12 +201,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
static int
esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
- struct mlx5_flow_attr *attr,
+ u32 sampler_id,
int i)
{
flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
- dest[i].sampler_id = attr->sample_attr->sampler_id;
+ dest[i].sampler_id = sampler_id;
return 0;
}
@@ -297,7 +297,7 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
int err;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
/* flow steering cannot handle more than one dest with the same ft
@@ -364,7 +364,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest,
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
int j, err;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
return -EOPNOTSUPP;
for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
@@ -463,15 +463,16 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
esw_src_port_rewrite_supported(esw))
- attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+ attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
- if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
- esw_setup_sampler_dest(dest, flow_act, attr, *i);
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
+ !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
+ esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
(*i)++;
} else if (attr->dest_ft) {
esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
(*i)++;
- } else if (mlx5_esw_attr_flags_skip(attr->flags)) {
+ } else if (mlx5e_tc_attr_flags_skip(attr->flags)) {
esw_setup_slow_path_dest(dest, flow_act, chains, *i);
(*i)++;
} else if (attr->dest_chain) {
@@ -498,7 +499,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw,
if (attr->dest_ft) {
esw_cleanup_decap_indir(esw, attr);
- } else if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+ } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
if (attr->dest_chain)
esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
else if (esw_is_indir_table(esw, attr))
@@ -589,7 +590,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
else
fdb = attr->ft;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
+ if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
mlx5_eswitch_set_rule_source_port(esw, spec, attr,
esw_attr->in_mdev->priv.eswitch,
esw_attr->in_rep->vport);
@@ -721,7 +722,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
mlx5_del_flow_rules(rule);
- if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+ if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
/* unref the term table */
for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
if (esw_attr->dests[i].termtbl)
@@ -863,7 +864,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (err)
goto unlock;
- attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
vport = esw_vlan_action_get_vport(esw_attr, push, pop);
@@ -871,7 +872,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
/* tracks VF --> wire rules without vlan push action */
if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
}
goto unlock;
@@ -902,7 +903,7 @@ skip_set_push:
}
out:
if (!err)
- attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
unlock:
mutex_unlock(&esw->state_lock);
return err;
@@ -921,7 +922,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
return 0;
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
+ if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
return 0;
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 182306bbefaa..ee568bf34ae2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -219,12 +219,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
- mlx5_esw_attr_flags_skip(attr->flags) ||
+ mlx5e_tc_attr_flags_skip(attr->flags) ||
(!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
return false;
/* push vlan on RX */
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
return true;
/* hairpin */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index dafe341358c7..a0ac17c3f12f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,6 +152,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
return 0;
}
+static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave,
bool ft_id_valid,
@@ -971,6 +977,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
}
+static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
.destroy_flow_table = mlx5_cmd_destroy_flow_table,
@@ -990,6 +1002,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_get_capabilities,
};
static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -1011,6 +1024,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_stub_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 220ec632d35a..274004e80f03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -101,6 +101,9 @@ struct mlx5_flow_cmds {
u16 format_id, u32 *match_mask);
int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
int definer_id);
+
+ u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type);
};
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b628917e38e4..42f878e21fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -3040,6 +3040,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
steering->esw_ingress_root_ns = NULL;
}
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ ns = mlx5_get_flow_namespace(dev, type);
+ if (!ns)
+ return 0;
+
+ root = find_root(&ns->node);
+ if (!root)
+ return 0;
+
+ return root->cmds->get_capabilities(root, root->table_type);
+}
+
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 5469b08d635f..c488a7c5b07e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode {
MLX5_FLOW_STEERING_MODE_SMFS
};
+enum mlx5_flow_steering_capabilty {
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
+ MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+};
+
struct mlx5_flow_steering {
struct mlx5_core_dev *dev;
enum mlx5_flow_steering_mode mode;
@@ -301,6 +306,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
+
struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index a476da2424f8..033757bfdf64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -735,6 +735,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
}
+static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ if (ft_type != FS_FT_FDB ||
+ MLX5_CAP_GEN(ns->dev, steering_format_version) != MLX5_STEERING_FORMAT_CONNECTX_6DX)
+ return 0;
+
+ return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+}
+
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
{
return mlx5dr_is_supported(dev);
@@ -759,6 +769,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
.set_peer = mlx5_cmd_dr_set_peer,
.create_ns = mlx5_cmd_dr_create_ns,
.destroy_ns = mlx5_cmd_dr_destroy_ns,
+ .get_capabilities = mlx5_cmd_dr_get_capabilities,
};
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 866b9357939b..f45df5fbdcc0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -212,6 +212,29 @@ struct mlxsw_event_listener_item {
void *priv;
};
+static const u8 mlxsw_core_trap_groups[] = {
+ MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT,
+};
+
+static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) {
+ mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i],
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
/******************
* EMAD processing
******************/
@@ -777,16 +800,10 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
if (err)
goto err_trap_register;
- err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core);
- if (err)
- goto err_emad_trap_set;
mlxsw_core->emad.use_emad = true;
return 0;
-err_emad_trap_set:
- mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
- mlxsw_core);
err_trap_register:
destroy_workqueue(mlxsw_core->emad_wq);
return err;
@@ -1706,7 +1723,7 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
}
static const struct mlxsw_listener mlxsw_core_health_listener =
- MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+ MLXSW_CORE_EVENTL(mlxsw_core_health_listener_func, MFDE);
static int
mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl,
@@ -2122,6 +2139,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
}
}
+ err = mlxsw_core_trap_groups_set(mlxsw_core);
+ if (err)
+ goto err_trap_groups_set;
+
err = mlxsw_emad_init(mlxsw_core);
if (err)
goto err_emad_init;
@@ -2181,6 +2202,7 @@ err_fw_rev_validate:
err_register_params:
mlxsw_emad_fini(mlxsw_core);
err_emad_init:
+err_trap_groups_set:
kfree(mlxsw_core->lag.mapping);
err_alloc_lag_mapping:
mlxsw_ports_fini(mlxsw_core, reload);
@@ -2540,6 +2562,45 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
}
EXPORT_SYMBOL(mlxsw_core_trap_unregister);
+int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listeners,
+ size_t listeners_count, void *priv)
+{
+ int i, err;
+
+ for (i = 0; i < listeners_count; i++) {
+ err = mlxsw_core_trap_register(mlxsw_core,
+ &listeners[i],
+ priv);
+ if (err)
+ goto err_listener_register;
+ }
+ return 0;
+
+err_listener_register:
+ for (i--; i >= 0; i--) {
+ mlxsw_core_trap_unregister(mlxsw_core,
+ &listeners[i],
+ priv);
+ }
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_core_traps_register);
+
+void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listeners,
+ size_t listeners_count, void *priv)
+{
+ int i;
+
+ for (i = 0; i < listeners_count; i++) {
+ mlxsw_core_trap_unregister(mlxsw_core,
+ &listeners[i],
+ priv);
+ }
+}
+EXPORT_SYMBOL(mlxsw_core_traps_unregister);
+
int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core,
const struct mlxsw_listener *listener,
bool enabled)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index f30bb8614e69..6d304092f4e7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -163,6 +163,9 @@ struct mlxsw_listener {
.enabled_on_register = true, \
}
+#define MLXSW_CORE_EVENTL(_func, _trap_id) \
+ MLXSW_EVENTL(_func, _trap_id, CORE_EVENT)
+
int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
const struct mlxsw_rx_listener *rxl,
void *priv, bool enabled);
@@ -181,6 +184,12 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
const struct mlxsw_listener *listener,
void *priv);
+int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listeners,
+ size_t listeners_count, void *priv);
+void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listeners,
+ size_t listeners_count, void *priv);
int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core,
const struct mlxsw_listener *listener,
bool enabled);
@@ -315,7 +324,6 @@ struct mlxsw_driver {
const struct mlxsw_bus_info *mlxsw_bus_info,
struct netlink_ext_ack *extack);
void (*fini)(struct mlxsw_core *mlxsw_core);
- int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
int (*port_type_set)(struct mlxsw_core *mlxsw_core, u16 local_port,
enum devlink_port_type new_type);
int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 77e82e6cf6e8..fa33caecc91d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1957,6 +1957,83 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
}
EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter);
+/* SIP DIP Action
+ * --------------
+ * The SIP_DIP_ACTION is used for modifying the SIP and DIP fields of the
+ * packet, e.g. for NAT. The L3 checksum is updated. Also, if the L4 is TCP or
+ * if the L4 is UDP and the checksum field is not zero, then the L4 checksum is
+ * updated.
+ */
+
+#define MLXSW_AFA_IP_CODE 0x11
+#define MLXSW_AFA_IP_SIZE 2
+
+enum mlxsw_afa_ip_s_d {
+ /* ip refers to dip */
+ MLXSW_AFA_IP_S_D_DIP,
+ /* ip refers to sip */
+ MLXSW_AFA_IP_S_D_SIP,
+};
+
+/* afa_ip_s_d
+ * Source or destination.
+ */
+MLXSW_ITEM32(afa, ip, s_d, 0x00, 31, 1);
+
+enum mlxsw_afa_ip_m_l {
+ /* LSB: ip[63:0] refers to ip[63:0] */
+ MLXSW_AFA_IP_M_L_LSB,
+ /* MSB: ip[63:0] refers to ip[127:64] */
+ MLXSW_AFA_IP_M_L_MSB,
+};
+
+/* afa_ip_m_l
+ * MSB or LSB.
+ */
+MLXSW_ITEM32(afa, ip, m_l, 0x00, 30, 1);
+
+/* afa_ip_ip_63_32
+ * Bits [63:32] in the IP address to change to.
+ */
+MLXSW_ITEM32(afa, ip, ip_63_32, 0x08, 0, 32);
+
+/* afa_ip_ip_31_0
+ * Bits [31:0] in the IP address to change to.
+ */
+MLXSW_ITEM32(afa, ip, ip_31_0, 0x0C, 0, 32);
+
+static void mlxsw_afa_ip_pack(char *payload, enum mlxsw_afa_ip_s_d s_d,
+ enum mlxsw_afa_ip_m_l m_l, u32 ip_31_0,
+ u32 ip_63_32)
+{
+ mlxsw_afa_ip_s_d_set(payload, s_d);
+ mlxsw_afa_ip_m_l_set(payload, m_l);
+ mlxsw_afa_ip_ip_31_0_set(payload, ip_31_0);
+ mlxsw_afa_ip_ip_63_32_set(payload, ip_63_32);
+}
+
+int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip,
+ bool is_lsb, u32 val_31_0, u32 val_63_32,
+ struct netlink_ext_ack *extack)
+{
+ enum mlxsw_afa_ip_s_d s_d = is_dip ? MLXSW_AFA_IP_S_D_DIP :
+ MLXSW_AFA_IP_S_D_SIP;
+ enum mlxsw_afa_ip_m_l m_l = is_lsb ? MLXSW_AFA_IP_M_L_LSB :
+ MLXSW_AFA_IP_M_L_MSB;
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_IP_CODE,
+ MLXSW_AFA_IP_SIZE);
+
+ if (IS_ERR(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append IP action");
+ return PTR_ERR(act);
+ }
+
+ mlxsw_afa_ip_pack(act, s_d, m_l, val_31_0, val_63_32);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_ip);
+
/* L4 Port Action
* --------------
* The L4_PORT_ACTION is used for modifying the sport and dport fields of the packet, e.g. for NAT.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 16cbd6acbb01..db58037be46e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -92,6 +92,9 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
u16 expected_irif, u16 min_mtu,
bool rmid_valid, u32 kvdl_index);
+int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip,
+ bool is_lsb, u32 val_31_0, u32 val_63_32,
+ struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port,
struct netlink_ext_ack *extack);
int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 6dd4ae2f45f4..6ea4bf87be0b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -18,6 +18,7 @@ struct mlxsw_env_module_info {
int num_ports_mapped;
int num_ports_up;
enum ethtool_module_power_mode_policy power_mode_policy;
+ enum mlxsw_reg_pmtm_module_type type;
};
struct mlxsw_env {
@@ -27,14 +28,47 @@ struct mlxsw_env {
struct mlxsw_env_module_info module_info[];
};
-static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
- bool *qsfp, bool *cmis)
+static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+ int err;
+
+ switch (mlxsw_env->module_info[module].type) {
+ case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR:
+ err = -EINVAL;
+ break;
+ default:
+ err = 0;
+ }
+
+ return err;
+}
+
+static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+ int err;
+
+ mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(core, module);
+ mutex_unlock(&mlxsw_env->module_info_lock);
+
+ return err;
+}
+
+static int
+mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
+ bool *cmis)
{
char mcia_pl[MLXSW_REG_MCIA_LEN];
char *eeprom_tmp;
u8 ident;
int err;
+ err = mlxsw_env_validate_module_type(core, id);
+ if (err)
+ return err;
+
mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
MLXSW_REG_MCIA_I2C_ADDR_LOW);
err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
@@ -206,7 +240,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
return 0;
}
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+ struct mlxsw_core *mlxsw_core, int module,
struct ethtool_modinfo *modinfo)
{
u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
@@ -215,6 +250,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
unsigned int read_size;
int err;
+ err = mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ netdev_err(netdev,
+ "EEPROM is not equipped on port module type");
+ return err;
+ }
+
err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
module_info, false, &read_size);
if (err)
@@ -356,6 +398,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
{
u32 bytes_read = 0;
u16 device_addr;
+ int err;
+
+ err = mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+ return err;
+ }
/* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
device_addr = page->offset;
@@ -364,7 +413,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
char mcia_pl[MLXSW_REG_MCIA_LEN];
char *eeprom_tmp;
u8 size;
- int err;
size = min_t(u8, page->length - bytes_read,
MLXSW_REG_MCIA_EEPROM_SIZE);
@@ -419,6 +467,12 @@ int mlxsw_env_reset_module(struct net_device *netdev,
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ netdev_err(netdev, "Reset module is not supported on port module type\n");
+ goto out;
+ }
+
if (mlxsw_env->module_info[module].num_ports_up) {
netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n");
err = -EINVAL;
@@ -461,6 +515,12 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type");
+ goto out;
+ }
+
params->policy = mlxsw_env->module_info[module].power_mode_policy;
mlxsw_reg_mcion_pack(mcion_pl, module);
@@ -571,6 +631,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
mutex_lock(&mlxsw_env->module_info_lock);
+ err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Power mode set is not supported on port module type");
+ goto out;
+ }
+
if (mlxsw_env->module_info[module].power_mode_policy == policy)
goto out;
@@ -661,13 +728,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
}
-static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
- u8 module_count)
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core)
{
int i, err, sensor_index;
bool has_temp_sensor;
- for (i = 0; i < module_count; i++) {
+ for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
&has_temp_sensor);
if (err)
@@ -759,7 +825,7 @@ mlxsw_env_mtwe_listener_func(const struct mlxsw_reg_info *reg, char *mtwe_pl,
}
static const struct mlxsw_listener mlxsw_env_temp_warn_listener =
- MLXSW_EVENTL(mlxsw_env_mtwe_listener_func, MTWE, MTWE);
+ MLXSW_CORE_EVENTL(mlxsw_env_mtwe_listener_func, MTWE);
static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core)
{
@@ -849,7 +915,7 @@ mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl,
}
static const struct mlxsw_listener mlxsw_env_module_plug_listener =
- MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE);
+ MLXSW_CORE_EVENTL(mlxsw_env_pmpe_listener_func, PMPE);
static int
mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core)
@@ -876,12 +942,11 @@ mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
}
static int
-mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
- u8 module_count)
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
{
int i, err;
- for (i = 0; i < module_count; i++) {
+ for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
char pmaos_pl[MLXSW_REG_PMAOS_LEN];
mlxsw_reg_pmaos_pack(pmaos_pl, i);
@@ -999,6 +1064,28 @@ out_unlock:
}
EXPORT_SYMBOL(mlxsw_env_module_port_down);
+static int
+mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+ int i;
+
+ for (i = 0; i < mlxsw_env->module_count; i++) {
+ char pmtm_pl[MLXSW_REG_PMTM_LEN];
+ int err;
+
+ mlxsw_reg_pmtm_pack(pmtm_pl, 0, i);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+ if (err)
+ return err;
+
+ mlxsw_env->module_info[i].type =
+ mlxsw_reg_pmtm_module_type_get(pmtm_pl);
+ }
+
+ return 0;
+}
+
int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
{
char mgpir_pl[MLXSW_REG_MGPIR_LEN];
@@ -1037,17 +1124,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
if (err)
goto err_module_plug_event_register;
- err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
- env->module_count);
+ err = mlxsw_env_module_oper_state_event_enable(mlxsw_core);
if (err)
goto err_oper_state_event_enable;
- err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+ err = mlxsw_env_module_temp_event_enable(mlxsw_core);
if (err)
goto err_temp_event_enable;
+ err = mlxsw_env_module_type_set(mlxsw_core);
+ if (err)
+ goto err_type_set;
+
return 0;
+err_type_set:
err_temp_event_enable:
err_oper_state_event_enable:
mlxsw_env_module_plug_event_unregister(env);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
index da121b1a84b4..ec6564e5d2ee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -12,7 +12,8 @@ struct ethtool_eeprom;
int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
int off, int *temp);
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+ struct mlxsw_core *mlxsw_core, int module,
struct ethtool_modinfo *modinfo);
int mlxsw_env_get_module_eeprom(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 10d13f5f9c7d..9ac8ce01c061 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+ return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module,
+ modinfo);
}
static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 24cc65018b41..eebd0479b2bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T BIT(25)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29)
@@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port,
*slot_index = mlxsw_reg_pllp_slot_index_get(payload);
}
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM register allows query or configuration of module types.
+ * The register can only be set when the module is disabled by PMAOS register
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0,
+ MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1,
+ MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2,
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4,
+ MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12,
+ MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14,
+ MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15,
+ MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16,
+ MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17,
+ MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18,
+ MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module)
+{
+ MLXSW_REG_ZERO(pmtm, payload);
+ mlxsw_reg_pmtm_slot_index_set(payload, slot_index);
+ mlxsw_reg_pmtm_module_set(payload, module);
+}
+
/* HTGT - Host Trap Group Table
* ----------------------------
* Configures the properties for forwarding to CPU.
@@ -6087,9 +6141,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
- MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
- MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
- MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
+ MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT,
MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
@@ -12568,6 +12620,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(pddr),
MLXSW_REG(pmmp),
MLXSW_REG(pllp),
+ MLXSW_REG(pmtm),
MLXSW_REG(htgt),
MLXSW_REG(hpkt),
MLXSW_REG(rgcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index c7fc650608eb..daacf6291253 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -33,6 +33,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_ACL_MAX_REGIONS,
MLXSW_RES_ID_ACL_MAX_GROUPS,
MLXSW_RES_ID_ACL_MAX_GROUP_SIZE,
+ MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS,
MLXSW_RES_ID_ACL_FLEX_KEYS,
MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE,
MLXSW_RES_ID_ACL_ACTIONS_PER_SET,
@@ -90,6 +91,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903,
[MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904,
[MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905,
+ [MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS] = 0x2908,
[MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910,
[MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911,
[MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index aa411dec62f0..a4b94eecea98 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2148,13 +2148,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *mlxsw_sp_port;
enum mlxsw_reg_pude_oper_status status;
- unsigned int max_ports;
u16 local_port;
- max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
local_port = mlxsw_reg_pude_local_port_get(pude_pl);
- if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
+ if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
return;
mlxsw_sp_port = mlxsw_sp->ports[local_port];
if (!mlxsw_sp_port)
@@ -2393,45 +2391,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
return 0;
}
-static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_listener listeners[],
- size_t listeners_count)
-{
- int i;
- int err;
-
- for (i = 0; i < listeners_count; i++) {
- err = mlxsw_core_trap_register(mlxsw_sp->core,
- &listeners[i],
- mlxsw_sp);
- if (err)
- goto err_listener_register;
-
- }
- return 0;
-
-err_listener_register:
- for (i--; i >= 0; i--) {
- mlxsw_core_trap_unregister(mlxsw_sp->core,
- &listeners[i],
- mlxsw_sp);
- }
- return err;
-}
-
-static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_listener listeners[],
- size_t listeners_count)
-{
- int i;
-
- for (i = 0; i < listeners_count; i++) {
- mlxsw_core_trap_unregister(mlxsw_sp->core,
- &listeners[i],
- mlxsw_sp);
- }
-}
-
static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_trap *trap;
@@ -2456,21 +2415,23 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_trap_groups_set;
- err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
- ARRAY_SIZE(mlxsw_sp_listener));
+ err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener),
+ mlxsw_sp);
if (err)
goto err_traps_register;
- err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
- mlxsw_sp->listeners_count);
+ err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count, mlxsw_sp);
if (err)
goto err_extra_traps_init;
return 0;
err_extra_traps_init:
- mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
- ARRAY_SIZE(mlxsw_sp_listener));
+ mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener),
+ mlxsw_sp);
err_traps_register:
err_trap_groups_set:
err_cpu_policers_set:
@@ -2480,10 +2441,11 @@ err_cpu_policers_set:
static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
{
- mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
- mlxsw_sp->listeners_count);
- mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
- ARRAY_SIZE(mlxsw_sp_listener));
+ mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count,
+ mlxsw_sp);
+ mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener), mlxsw_sp);
kfree(mlxsw_sp->trap);
}
@@ -2528,42 +2490,6 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->lags);
}
-static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
-{
- char htgt_pl[MLXSW_REG_HTGT_LEN];
- int err;
-
- mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
- MLXSW_REG_HTGT_INVALID_POLICER,
- MLXSW_REG_HTGT_DEFAULT_PRIORITY,
- MLXSW_REG_HTGT_DEFAULT_TC);
- err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
- if (err)
- return err;
-
- mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
- MLXSW_REG_HTGT_INVALID_POLICER,
- MLXSW_REG_HTGT_DEFAULT_PRIORITY,
- MLXSW_REG_HTGT_DEFAULT_TC);
- err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
- if (err)
- return err;
-
- mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
- MLXSW_REG_HTGT_INVALID_POLICER,
- MLXSW_REG_HTGT_DEFAULT_PRIORITY,
- MLXSW_REG_HTGT_DEFAULT_TC);
- err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
- if (err)
- return err;
-
- mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
- MLXSW_REG_HTGT_INVALID_POLICER,
- MLXSW_REG_HTGT_DEFAULT_PRIORITY,
- MLXSW_REG_HTGT_DEFAULT_TC);
- return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-}
-
static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
.clock_init = mlxsw_sp1_ptp_clock_init,
.clock_fini = mlxsw_sp1_ptp_clock_fini,
@@ -3677,7 +3603,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.fw_filename = MLXSW_SP1_FW_FILENAME,
.init = mlxsw_sp1_init,
.fini = mlxsw_sp_fini,
- .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
@@ -3717,7 +3642,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.fw_filename = MLXSW_SP2_FW_FILENAME,
.init = mlxsw_sp2_init,
.fini = mlxsw_sp_fini,
- .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
@@ -3758,7 +3682,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
.fw_filename = MLXSW_SP3_FW_FILENAME,
.init = mlxsw_sp3_init,
.fini = mlxsw_sp_fini,
- .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
@@ -3797,7 +3720,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = {
.priv_size = sizeof(struct mlxsw_sp),
.init = mlxsw_sp4_init,
.fini = mlxsw_sp_fini,
- .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index bb2442e1f705..20588e699588 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -481,6 +481,13 @@ int
mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port,
bool is_8021ad_tagged,
bool is_8021q_tagged);
+static inline bool
+mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+
+ return local_port < max_ports && local_port;
+}
/* spectrum_buffers.c */
struct mlxsw_sp_hdroom_prio {
@@ -813,6 +820,24 @@ int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
/* spectrum2_kvdl.c */
extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops;
+enum mlxsw_sp_acl_mangle_field {
+ MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3,
+ MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4,
+};
+
struct mlxsw_sp_acl_rule_info {
unsigned int priority;
struct mlxsw_afk_element_values values;
@@ -821,9 +846,14 @@ struct mlxsw_sp_acl_rule_info {
ingress_bind_blocker:1,
egress_bind_blocker:1,
counter_valid:1,
- policer_index_valid:1;
+ policer_index_valid:1,
+ ipv6_valid:1;
unsigned int counter_index;
u16 policer_index;
+ struct {
+ u32 prev_val;
+ enum mlxsw_sp_acl_mangle_field prev_field;
+ } ipv6;
};
/* spectrum_flow.c */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
index a9fff8adc75e..d20e794e01ca 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
@@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp1_kvdl_part *part;
bool need_update = true;
unsigned int nr_entries;
- size_t usage_size;
u64 resource_size;
int err;
@@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
}
nr_entries = div_u64(resource_size, info->alloc_size);
- usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
- part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+ part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)),
+ GFP_KERNEL);
if (!part)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
index ad69913f19c1..5b0210862655 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
@@ -77,7 +77,14 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
int i;
int err;
+ /* Some TCAM regions are not exposed to the host and used internally
+ * by the device. Allocate KVDL entries for the default actions of
+ * these regions to avoid the host from overwriting them.
+ */
tcam->kvdl_count = _tcam->max_regions;
+ if (MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_DEFAULT_ACTIONS))
+ tcam->kvdl_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_MAX_DEFAULT_ACTIONS);
err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
tcam->kvdl_count, &tcam->kvdl_index);
if (err)
@@ -97,7 +104,10 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
goto err_afa_block_continue;
enc_actions = mlxsw_afa_block_cur_set(afa_block);
- for (i = 0; i < tcam->kvdl_count; i++) {
+ /* Only write to KVDL entries used by TCAM regions exposed to the
+ * host.
+ */
+ for (i = 0; i < _tcam->max_regions; i++) {
mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i,
true, enc_actions);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 70c11bfac08f..6c5af018546f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -505,14 +505,6 @@ int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp,
extack);
}
-enum mlxsw_sp_acl_mangle_field {
- MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD,
- MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP,
- MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN,
- MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT,
- MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT,
-};
-
struct mlxsw_sp_acl_mangle_action {
enum flow_action_mangle_base htype;
/* Offset is u32-aligned. */
@@ -561,6 +553,18 @@ static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = {
MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0x0000ffff, 16, IP_SPORT),
MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0xffff0000, 0, IP_DPORT),
+
+ MLXSW_SP_ACL_MANGLE_ACTION_IP4(12, 0x00000000, 0, IP4_SIP),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP4(16, 0x00000000, 0, IP4_DIP),
+
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(8, 0x00000000, 0, IP6_SIP_1),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(12, 0x00000000, 0, IP6_SIP_2),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(16, 0x00000000, 0, IP6_SIP_3),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(20, 0x00000000, 0, IP6_SIP_4),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(24, 0x00000000, 0, IP6_DIP_1),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(28, 0x00000000, 0, IP6_DIP_2),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(32, 0x00000000, 0, IP6_DIP_3),
+ MLXSW_SP_ACL_MANGLE_ACTION_IP6(36, 0x00000000, 0, IP6_DIP_4),
};
static int
@@ -599,6 +603,22 @@ static int mlxsw_sp1_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
return err;
}
+static int
+mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(struct mlxsw_sp_acl_rule_info *rulei,
+ enum mlxsw_sp_acl_mangle_field field,
+ u32 val, struct netlink_ext_ack *extack)
+{
+ if (!rulei->ipv6_valid) {
+ rulei->ipv6.prev_val = val;
+ rulei->ipv6_valid = true;
+ rulei->ipv6.prev_field = field;
+ return 0;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field order");
+ return -EOPNOTSUPP;
+}
+
static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
struct mlxsw_sp_acl_mangle_action *mact,
@@ -615,6 +635,61 @@ static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp,
return mlxsw_afa_block_append_l4port(rulei->act_block, false, val, extack);
case MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT:
return mlxsw_afa_block_append_l4port(rulei->act_block, true, val, extack);
+ /* IPv4 fields */
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP:
+ return mlxsw_afa_block_append_ip(rulei->act_block, false,
+ true, val, 0, extack);
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP:
+ return mlxsw_afa_block_append_ip(rulei->act_block, true,
+ true, val, 0, extack);
+ /* IPv6 fields */
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1:
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3:
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1:
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3:
+ return mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(rulei,
+ mact->field,
+ val, extack);
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2:
+ if (rulei->ipv6_valid &&
+ rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1) {
+ rulei->ipv6_valid = false;
+ return mlxsw_afa_block_append_ip(rulei->act_block,
+ false, false, val,
+ rulei->ipv6.prev_val,
+ extack);
+ }
+ break;
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4:
+ if (rulei->ipv6_valid &&
+ rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3) {
+ rulei->ipv6_valid = false;
+ return mlxsw_afa_block_append_ip(rulei->act_block,
+ false, true, val,
+ rulei->ipv6.prev_val,
+ extack);
+ }
+ break;
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2:
+ if (rulei->ipv6_valid &&
+ rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1) {
+ rulei->ipv6_valid = false;
+ return mlxsw_afa_block_append_ip(rulei->act_block,
+ true, false, val,
+ rulei->ipv6.prev_val,
+ extack);
+ }
+ break;
+ case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4:
+ if (rulei->ipv6_valid &&
+ rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3) {
+ rulei->ipv6_valid = false;
+ return mlxsw_afa_block_append_ip(rulei->act_block,
+ true, true, val,
+ rulei->ipv6.prev_val,
+ extack);
+ }
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 20530712eadb..8b5d7f83b9b0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- int err;
-
- err = mlxsw_env_get_module_info(mlxsw_sp->core,
- mlxsw_sp_port->mapping.module,
- modinfo);
- return err;
+ return mlxsw_env_get_module_info(netdev, mlxsw_sp->core,
+ mlxsw_sp_port->mapping.module,
+ modinfo);
}
static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
@@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- int err;
-
- err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
- mlxsw_sp_port->mapping.module, ee,
- data);
- return err;
+ return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+ mlxsw_sp_port->mapping.module, ee,
+ data);
}
static int
@@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode {
static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
{
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ .speed = SPEED_100,
+ },
+ {
.mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
.mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
.speed = SPEED_1000,
},
{
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ .speed = SPEED_1000,
+ },
+ {
.mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
.mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index bb417db773b9..f54af3d9a03b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -233,6 +233,12 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return -EOPNOTSUPP;
}
}
+
+ if (rulei->ipv6_valid) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field");
+ return -EOPNOTSUPP;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index 0ff163fbc775..35422e64d89f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -568,12 +568,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
u8 domain_number, u16 sequence_id,
u64 timestamp)
{
- unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
struct mlxsw_sp_port *mlxsw_sp_port;
struct mlxsw_sp1_ptp_key key;
u8 types;
- if (WARN_ON_ONCE(local_port >= max_ports))
+ if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
return;
mlxsw_sp_port = mlxsw_sp->ports[local_port];
if (!mlxsw_sp_port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 65c1724c63b0..bffdb41fc4ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2616,7 +2616,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
char *sfn_pl, int rec_index,
bool adding)
{
- unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
struct mlxsw_sp_bridge_device *bridge_device;
struct mlxsw_sp_bridge_port *bridge_port;
@@ -2630,7 +2629,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port);
- if (WARN_ON_ONCE(local_port >= max_ports))
+ if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
return;
mlxsw_sp_port = mlxsw_sp->ports[local_port];
if (!mlxsw_sp_port) {
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index 91a755efe2e6..5f1e7b8bad4f 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev,
}
if (eee->eee_enabled) {
- ret = phy_init_eee(phydev, 0);
+ ret = phy_init_eee(phydev, false);
if (ret) {
netif_err(adapter, drv, adapter->netdev,
"EEE initialization failed\n");
diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index 040cfff9f577..a9ffc719aa0e 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \
lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
- lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o
+ lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
+ lan966x_ptp.o
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
index 614f12c2fe6a..e58a27fd8b50 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
@@ -545,6 +545,39 @@ static int lan966x_set_pauseparam(struct net_device *dev,
return phylink_ethtool_set_pauseparam(port->phylink, pause);
}
+static int lan966x_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ struct lan966x_phc *phc;
+
+ if (!lan966x->ptp)
+ return ethtool_op_get_ts_info(dev, info);
+
+ phc = &lan966x->phc[LAN966X_PHC_PORT];
+
+ info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1;
+ if (info->phc_index == -1) {
+ info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+ return 0;
+ }
+ info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+ BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
const struct ethtool_ops lan966x_ethtool_ops = {
.get_link_ksettings = lan966x_get_link_ksettings,
.set_link_ksettings = lan966x_set_link_ksettings,
@@ -556,6 +589,7 @@ const struct ethtool_ops lan966x_ethtool_ops = {
.get_eth_mac_stats = lan966x_get_eth_mac_stats,
.get_rmon_stats = lan966x_get_eth_rmon_stats,
.get_link = ethtool_op_get_link,
+ .get_ts_info = lan966x_get_ts_info,
};
static void lan966x_check_stats_work(struct work_struct *work)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 1f60fd125a1d..d62484f14564 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -4,11 +4,13 @@
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/iopoll.h>
+#include <linux/ip.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/packing.h>
#include <linux/phy/phy.h>
#include <linux/reset.h>
+#include <net/addrconf.h>
#include "lan966x_main.h"
@@ -44,6 +46,7 @@ static const struct lan966x_main_io_resource lan966x_main_iomap[] = {
{ TARGET_ORG, 0, 1 }, /* 0xe2000000 */
{ TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */
{ TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */
+ { TARGET_PTP, 0xc000, 1 }, /* 0xe200c000 */
{ TARGET_CHIP_TOP, 0x10000, 1 }, /* 0xe2010000 */
{ TARGET_REW, 0x14000, 1 }, /* 0xe2014000 */
{ TARGET_SYS, 0x28000, 1 }, /* 0xe2028000 */
@@ -201,7 +204,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
val = lan_rd(lan966x, QS_INJ_STATUS);
if (!(QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp)) ||
(QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp)))
- return NETDEV_TX_BUSY;
+ goto err;
/* Write start of frame */
lan_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) |
@@ -213,7 +216,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
/* Wait until the fifo is ready */
err = lan966x_port_inj_ready(lan966x, grp);
if (err)
- return NETDEV_TX_BUSY;
+ goto err;
lan_wr((__force u32)ifh[i], lan966x, QS_INJ_WR(grp));
}
@@ -225,7 +228,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
/* Wait until the fifo is ready */
err = lan966x_port_inj_ready(lan966x, grp);
if (err)
- return NETDEV_TX_BUSY;
+ goto err;
lan_wr(((u32 *)skb->data)[i], lan966x, QS_INJ_WR(grp));
}
@@ -235,7 +238,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
/* Wait until the fifo is ready */
err = lan966x_port_inj_ready(lan966x, grp);
if (err)
- return NETDEV_TX_BUSY;
+ goto err;
lan_wr(0, lan966x, QS_INJ_WR(grp));
++i;
@@ -255,8 +258,19 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ return NETDEV_TX_OK;
+
dev_consume_skb_any(skb);
return NETDEV_TX_OK;
+
+err:
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ lan966x_ptp_txtstamp_release(port, skb);
+
+ return NETDEV_TX_BUSY;
}
static void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
@@ -289,10 +303,23 @@ static void lan966x_ifh_set_vid(void *ifh, u64 vid)
IFH_POS_TCI, IFH_LEN * 4, PACK, 0);
}
+static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op)
+{
+ packing(ifh, &rew_op, IFH_POS_REW_CMD + IFH_WID_REW_CMD - 1,
+ IFH_POS_REW_CMD, IFH_LEN * 4, PACK, 0);
+}
+
+static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp)
+{
+ packing(ifh, &timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1,
+ IFH_POS_TIMESTAMP, IFH_LEN * 4, PACK, 0);
+}
+
static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct lan966x_port *port = netdev_priv(dev);
__be32 ifh[IFH_LEN];
+ int err;
memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
@@ -302,6 +329,15 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev)
lan966x_ifh_set_ipv(ifh, skb->priority >= 7 ? 0x7 : skb->priority);
lan966x_ifh_set_vid(ifh, skb_vlan_tag_get(skb));
+ if (port->lan966x->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+ err = lan966x_ptp_txtstamp_request(port, skb);
+ if (err)
+ return err;
+
+ lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op);
+ lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id);
+ }
+
return lan966x_port_ifh_xmit(skb, ifh, dev);
}
@@ -350,6 +386,23 @@ static int lan966x_port_get_parent_id(struct net_device *dev,
return 0;
}
+static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+
+ if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) {
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return lan966x_ptp_hwtstamp_set(port, ifr);
+ case SIOCGHWTSTAMP:
+ return lan966x_ptp_hwtstamp_get(port, ifr);
+ }
+ }
+
+ return phy_mii_ioctl(dev->phydev, ifr, cmd);
+}
+
static const struct net_device_ops lan966x_port_netdev_ops = {
.ndo_open = lan966x_port_open,
.ndo_stop = lan966x_port_stop,
@@ -360,6 +413,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = {
.ndo_get_stats64 = lan966x_stats_get,
.ndo_set_mac_address = lan966x_port_set_mac_address,
.ndo_get_port_parent_id = lan966x_port_get_parent_id,
+ .ndo_eth_ioctl = lan966x_port_ioctl,
};
bool lan966x_netdevice_check(const struct net_device *dev)
@@ -367,6 +421,32 @@ bool lan966x_netdevice_check(const struct net_device *dev)
return dev->netdev_ops == &lan966x_port_netdev_ops;
}
+static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port,
+ struct sk_buff *skb)
+{
+ u32 val;
+
+ /* The IGMP and MLD frames are not forward by the HW if
+ * multicast snooping is enabled, therefor don't mark as
+ * offload to allow the SW to forward the frames accordingly.
+ */
+ val = lan_rd(lan966x, ANA_CPU_FWD_CFG(port));
+ if (!(val & (ANA_CPU_FWD_CFG_IGMP_REDIR_ENA |
+ ANA_CPU_FWD_CFG_MLD_REDIR_ENA)))
+ return true;
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ ip_hdr(skb)->protocol == IPPROTO_IGMP)
+ return false;
+
+ if (skb->protocol == htons(ETH_P_IPV6) &&
+ ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
+ !ipv6_mc_check_mld(skb))
+ return false;
+
+ return true;
+}
+
static int lan966x_port_xtr_status(struct lan966x *lan966x, u8 grp)
{
return lan_rd(lan966x, QS_XTR_RD(grp));
@@ -434,6 +514,12 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len)
IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0);
}
+static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp)
+{
+ packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1,
+ IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0);
+}
+
static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
{
struct lan966x *lan966x = args;
@@ -443,10 +529,10 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
return IRQ_NONE;
do {
+ u64 src_port, len, timestamp;
struct net_device *dev;
struct sk_buff *skb;
int sz = 0, buf_len;
- u64 src_port, len;
u32 ifh[IFH_LEN];
u32 *buf;
u32 val;
@@ -461,6 +547,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
lan966x_ifh_get_src_port(ifh, &src_port);
lan966x_ifh_get_len(ifh, &len);
+ lan966x_ifh_get_timestamp(ifh, &timestamp);
WARN_ON(src_port >= lan966x->num_phys_ports);
@@ -501,11 +588,17 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
*buf = val;
}
+ lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
skb->protocol = eth_type_trans(skb, dev);
- if (lan966x->bridge_mask & BIT(src_port))
+ if (lan966x->bridge_mask & BIT(src_port)) {
skb->offload_fwd_mark = 1;
+ skb_reset_network_header(skb);
+ if (!lan966x_hw_offload(lan966x, src_port, skb))
+ skb->offload_fwd_mark = 0;
+ }
+
netif_rx_ni(skb);
dev->stats.rx_bytes += len;
dev->stats.rx_packets++;
@@ -628,7 +721,6 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
}
port->phylink = phylink;
- phylink_set_pcs(phylink, &port->phylink_pcs);
err = register_netdev(dev);
if (err) {
@@ -708,7 +800,7 @@ static void lan966x_init(struct lan966x *lan966x)
/* Setup flooding PGIDs */
lan_wr(ANA_FLOODING_IPMC_FLD_MC4_DATA_SET(PGID_MCIPV4) |
ANA_FLOODING_IPMC_FLD_MC4_CTRL_SET(PGID_MC) |
- ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MC) |
+ ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MCIPV6) |
ANA_FLOODING_IPMC_FLD_MC6_CTRL_SET(PGID_MC),
lan966x, ANA_FLOODING_IPMC);
@@ -770,6 +862,10 @@ static void lan966x_init(struct lan966x *lan966x)
ANA_PGID_PGID,
lan966x, ANA_PGID(PGID_MCIPV4));
+ lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0),
+ ANA_PGID_PGID,
+ lan966x, ANA_PGID(PGID_MCIPV6));
+
/* Unicast to all other ports */
lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0),
ANA_PGID_PGID,
@@ -897,6 +993,17 @@ static int lan966x_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, err, "Unable to use ana irq");
}
+ lan966x->ptp_irq = platform_get_irq_byname(pdev, "ptp");
+ if (lan966x->ptp_irq > 0) {
+ err = devm_request_threaded_irq(&pdev->dev, lan966x->ptp_irq, NULL,
+ lan966x_ptp_irq_handler, IRQF_ONESHOT,
+ "ptp irq", lan966x);
+ if (err)
+ return dev_err_probe(&pdev->dev, err, "Unable to use ptp irq");
+
+ lan966x->ptp = 1;
+ }
+
/* init switch */
lan966x_init(lan966x);
lan966x_stats_init(lan966x);
@@ -931,8 +1038,15 @@ static int lan966x_probe(struct platform_device *pdev)
if (err)
goto cleanup_ports;
+ err = lan966x_ptp_init(lan966x);
+ if (err)
+ goto cleanup_fdb;
+
return 0;
+cleanup_fdb:
+ lan966x_fdb_deinit(lan966x);
+
cleanup_ports:
fwnode_handle_put(portnp);
@@ -958,6 +1072,7 @@ static int lan966x_remove(struct platform_device *pdev)
lan966x_mac_purge_entries(lan966x);
lan966x_mdb_deinit(lan966x);
lan966x_fdb_deinit(lan966x);
+ lan966x_ptp_deinit(lan966x);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 99c6d0a9f946..058e43531818 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -8,6 +8,7 @@
#include <linux/jiffies.h>
#include <linux/phy.h>
#include <linux/phylink.h>
+#include <linux/ptp_clock_kernel.h>
#include <net/switchdev.h>
#include "lan966x_regs.h"
@@ -50,6 +51,13 @@
#define LAN966X_SPEED_100 2
#define LAN966X_SPEED_10 3
+#define LAN966X_PHC_COUNT 3
+#define LAN966X_PHC_PORT 0
+
+#define IFH_REW_OP_NOOP 0x0
+#define IFH_REW_OP_ONE_STEP_PTP 0x3
+#define IFH_REW_OP_TWO_STEP_PTP 0x4
+
/* MAC table entry types.
* ENTRYTYPE_NORMAL is subject to aging.
* ENTRYTYPE_LOCKED is not subject to aging.
@@ -70,6 +78,24 @@ struct lan966x_stat_layout {
char name[ETH_GSTRING_LEN];
};
+struct lan966x_phc {
+ struct ptp_clock *clock;
+ struct ptp_clock_info info;
+ struct hwtstamp_config hwtstamp_config;
+ struct lan966x *lan966x;
+ u8 index;
+};
+
+struct lan966x_skb_cb {
+ u8 rew_op;
+ u16 ts_id;
+ unsigned long jiffies;
+};
+
+#define LAN966X_PTP_TIMEOUT msecs_to_jiffies(10)
+#define LAN966X_SKB_CB(skb) \
+ ((struct lan966x_skb_cb *)((skb)->cb))
+
struct lan966x {
struct device *dev;
@@ -105,6 +131,7 @@ struct lan966x {
/* interrupts */
int xtr_irq;
int ana_irq;
+ int ptp_irq;
/* worqueue for fdb */
struct workqueue_struct *fdb_work;
@@ -113,6 +140,14 @@ struct lan966x {
/* mdb */
struct list_head mdb_entries;
struct list_head pgid_entries;
+
+ /* ptp */
+ bool ptp;
+ struct lan966x_phc phc[LAN966X_PHC_COUNT];
+ spinlock_t ptp_clock_lock; /* lock for phc */
+ spinlock_t ptp_ts_id_lock; /* lock for ts_id */
+ struct mutex ptp_lock; /* lock for ptp interface state */
+ u16 ptp_skbs;
};
struct lan966x_port_config {
@@ -135,6 +170,7 @@ struct lan966x_port {
bool vlan_aware;
bool learn_ena;
+ bool mcast_ena;
struct phylink_config phylink_config;
struct phylink_pcs phylink_pcs;
@@ -142,6 +178,10 @@ struct lan966x_port {
struct phylink *phylink;
struct phy *serdes;
struct fwnode_handle *fwnode;
+
+ u8 ptp_cmd;
+ u16 ts_id;
+ struct sk_buff_head tx_skbs;
};
extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
@@ -227,6 +267,20 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port,
const struct switchdev_obj *obj);
void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid);
void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid);
+void lan966x_mdb_clear_entries(struct lan966x *lan966x);
+void lan966x_mdb_restore_entries(struct lan966x *lan966x);
+
+int lan966x_ptp_init(struct lan966x *lan966x);
+void lan966x_ptp_deinit(struct lan966x *lan966x);
+int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr);
+int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr);
+void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb,
+ u64 timestamp);
+int lan966x_ptp_txtstamp_request(struct lan966x_port *port,
+ struct sk_buff *skb);
+void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
+ struct sk_buff *skb);
+irqreturn_t lan966x_ptp_irq_handler(int irq, void *args);
static inline void __iomem *lan_addr(void __iomem *base[],
int id, int tinst, int tcnt,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
index c68d0a99d292..2af55268bf4d 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c
@@ -504,3 +504,48 @@ void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid)
lan966x_mdb_l2_cpu_remove(lan966x, mdb_entry, type);
}
}
+
+void lan966x_mdb_clear_entries(struct lan966x *lan966x)
+{
+ struct lan966x_mdb_entry *mdb_entry;
+ enum macaccess_entry_type type;
+ unsigned char mac[ETH_ALEN];
+
+ list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) {
+ type = lan966x_mdb_classify(mdb_entry->mac);
+
+ lan966x_mdb_encode_mac(mac, mdb_entry, type);
+ /* Remove just the MAC entry, still keep the PGID in case of L2
+ * entries because this can be restored at later point
+ */
+ lan966x_mac_forget(lan966x, mac, mdb_entry->vid, type);
+ }
+}
+
+void lan966x_mdb_restore_entries(struct lan966x *lan966x)
+{
+ struct lan966x_mdb_entry *mdb_entry;
+ enum macaccess_entry_type type;
+ unsigned char mac[ETH_ALEN];
+ bool cpu_copy = false;
+
+ list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) {
+ type = lan966x_mdb_classify(mdb_entry->mac);
+
+ lan966x_mdb_encode_mac(mac, mdb_entry, type);
+ if (type == ENTRYTYPE_MACV4 || type == ENTRYTYPE_MACV6) {
+ /* Copy the frame to CPU only if the CPU is in the VLAN */
+ if (lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ mdb_entry->vid) &&
+ mdb_entry->cpu_copy)
+ cpu_copy = true;
+
+ lan966x_mac_ip_learn(lan966x, cpu_copy, mac,
+ mdb_entry->vid, type);
+ } else {
+ lan966x_mac_learn(lan966x, mdb_entry->pgid->index,
+ mdb_entry->mac,
+ mdb_entry->vid, type);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
index b66a9aa00ea4..38a7e95d69b4 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
@@ -9,6 +9,14 @@
#include "lan966x_main.h"
+static struct phylink_pcs *lan966x_phylink_mac_select(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct lan966x_port *port = netdev_priv(to_net_dev(config->dev));
+
+ return &port->phylink_pcs;
+}
+
static void lan966x_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
@@ -114,6 +122,7 @@ static void lan966x_pcs_aneg_restart(struct phylink_pcs *pcs)
const struct phylink_mac_ops lan966x_phylink_mac_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = lan966x_phylink_mac_select,
.mac_config = lan966x_phylink_mac_config,
.mac_prepare = lan966x_phylink_mac_prepare,
.mac_link_down = lan966x_phylink_mac_link_down,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
new file mode 100644
index 000000000000..ae782778d6dd
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/ptp_classify.h>
+
+#include "lan966x_main.h"
+
+#define LAN966X_MAX_PTP_ID 512
+
+/* Represents 1ppm adjustment in 2^59 format with 6.037735849ns as reference
+ * The value is calculated as following: (1/1000000)/((2^-59)/6.037735849)
+ */
+#define LAN966X_1PPM_FORMAT 3480517749723LL
+
+/* Represents 1ppb adjustment in 2^29 format with 6.037735849ns as reference
+ * The value is calculated as following: (1/1000000000)/((2^59)/6.037735849)
+ */
+#define LAN966X_1PPB_FORMAT 3480517749LL
+
+#define TOD_ACC_PIN 0x5
+
+enum {
+ PTP_PIN_ACTION_IDLE = 0,
+ PTP_PIN_ACTION_LOAD,
+ PTP_PIN_ACTION_SAVE,
+ PTP_PIN_ACTION_CLOCK,
+ PTP_PIN_ACTION_DELTA,
+ PTP_PIN_ACTION_TOD
+};
+
+static u64 lan966x_ptp_get_nominal_value(void)
+{
+ u64 res = 0x304d2df1;
+
+ res <<= 32;
+ return res;
+}
+
+int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct hwtstamp_config cfg;
+ struct lan966x_phc *phc;
+
+ /* For now don't allow to run ptp on ports that are part of a bridge,
+ * because in case of transparent clock the HW will still forward the
+ * frames, so there would be duplicate frames
+ */
+ if (lan966x->bridge_mask & BIT(port->chip_port))
+ return -EINVAL;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ switch (cfg.tx_type) {
+ case HWTSTAMP_TX_ON:
+ port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+ break;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP;
+ break;
+ case HWTSTAMP_TX_OFF:
+ port->ptp_cmd = IFH_REW_OP_NOOP;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ cfg.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* Commit back the result & save it */
+ mutex_lock(&lan966x->ptp_lock);
+ phc = &lan966x->phc[LAN966X_PHC_PORT];
+ memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg));
+ mutex_unlock(&lan966x->ptp_lock);
+
+ return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct lan966x_phc *phc;
+
+ phc = &lan966x->phc[LAN966X_PHC_PORT];
+ return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config,
+ sizeof(phc->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb)
+{
+ struct ptp_header *header;
+ u8 msgtype;
+ int type;
+
+ if (port->ptp_cmd == IFH_REW_OP_NOOP)
+ return IFH_REW_OP_NOOP;
+
+ type = ptp_classify_raw(skb);
+ if (type == PTP_CLASS_NONE)
+ return IFH_REW_OP_NOOP;
+
+ header = ptp_parse_header(skb, type);
+ if (!header)
+ return IFH_REW_OP_NOOP;
+
+ if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+ return IFH_REW_OP_TWO_STEP_PTP;
+
+ /* If it is sync and run 1 step then set the correct operation,
+ * otherwise run as 2 step
+ */
+ msgtype = ptp_get_msgtype(header, type);
+ if ((msgtype & 0xf) == 0)
+ return IFH_REW_OP_ONE_STEP_PTP;
+
+ return IFH_REW_OP_TWO_STEP_PTP;
+}
+
+static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port)
+{
+ struct sk_buff *skb, *skb_tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->tx_skbs.lock, flags);
+ skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
+ if time_after(LAN966X_SKB_CB(skb)->jiffies + LAN966X_PTP_TIMEOUT,
+ jiffies)
+ break;
+
+ __skb_unlink(skb, &port->tx_skbs);
+ dev_kfree_skb_any(skb);
+ }
+ spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
+}
+
+int lan966x_ptp_txtstamp_request(struct lan966x_port *port,
+ struct sk_buff *skb)
+{
+ struct lan966x *lan966x = port->lan966x;
+ unsigned long flags;
+ u8 rew_op;
+
+ rew_op = lan966x_ptp_classify(port, skb);
+ LAN966X_SKB_CB(skb)->rew_op = rew_op;
+
+ if (rew_op != IFH_REW_OP_TWO_STEP_PTP)
+ return 0;
+
+ lan966x_ptp_txtstamp_old_release(port);
+
+ spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags);
+ if (lan966x->ptp_skbs == LAN966X_MAX_PTP_ID) {
+ spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+ return -EBUSY;
+ }
+
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ skb_queue_tail(&port->tx_skbs, skb);
+ LAN966X_SKB_CB(skb)->ts_id = port->ts_id;
+ LAN966X_SKB_CB(skb)->jiffies = jiffies;
+
+ lan966x->ptp_skbs++;
+ port->ts_id++;
+ if (port->ts_id == LAN966X_MAX_PTP_ID)
+ port->ts_id = 0;
+
+ spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+
+ return 0;
+}
+
+void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
+ struct sk_buff *skb)
+{
+ struct lan966x *lan966x = port->lan966x;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags);
+ port->ts_id--;
+ lan966x->ptp_skbs--;
+ skb_unlink(skb, &port->tx_skbs);
+ spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+}
+
+static void lan966x_get_hwtimestamp(struct lan966x *lan966x,
+ struct timespec64 *ts,
+ u32 nsec)
+{
+ /* Read current PTP time to get seconds */
+ unsigned long flags;
+ u32 curr_nsec;
+
+ spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) |
+ PTP_PIN_CFG_PIN_DOM_SET(LAN966X_PHC_PORT) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ ts->tv_sec = lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+ curr_nsec = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+ ts->tv_nsec = nsec;
+
+ /* Sec has incremented since the ts was registered */
+ if (curr_nsec < nsec)
+ ts->tv_sec--;
+
+ spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+}
+
+irqreturn_t lan966x_ptp_irq_handler(int irq, void *args)
+{
+ int budget = LAN966X_MAX_PTP_ID;
+ struct lan966x *lan966x = args;
+
+ while (budget--) {
+ struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+ struct skb_shared_hwtstamps shhwtstamps;
+ struct lan966x_port *port;
+ struct timespec64 ts;
+ unsigned long flags;
+ u32 val, id, txport;
+ u32 delay;
+
+ val = lan_rd(lan966x, PTP_TWOSTEP_CTRL);
+
+ /* Check if a timestamp can be retrieved */
+ if (!(val & PTP_TWOSTEP_CTRL_VLD))
+ break;
+
+ WARN_ON(val & PTP_TWOSTEP_CTRL_OVFL);
+
+ if (!(val & PTP_TWOSTEP_CTRL_STAMP_TX))
+ continue;
+
+ /* Retrieve the ts Tx port */
+ txport = PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val);
+
+ /* Retrieve its associated skb */
+ port = lan966x->ports[txport];
+
+ /* Retrieve the delay */
+ delay = lan_rd(lan966x, PTP_TWOSTEP_STAMP);
+ delay = PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay);
+
+ /* Get next timestamp from fifo, which needs to be the
+ * rx timestamp which represents the id of the frame
+ */
+ lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1),
+ PTP_TWOSTEP_CTRL_NXT,
+ lan966x, PTP_TWOSTEP_CTRL);
+
+ val = lan_rd(lan966x, PTP_TWOSTEP_CTRL);
+
+ /* Check if a timestamp can be retried */
+ if (!(val & PTP_TWOSTEP_CTRL_VLD))
+ break;
+
+ /* Read RX timestamping to get the ID */
+ id = lan_rd(lan966x, PTP_TWOSTEP_STAMP);
+
+ spin_lock_irqsave(&port->tx_skbs.lock, flags);
+ skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
+ if (LAN966X_SKB_CB(skb)->ts_id != id)
+ continue;
+
+ __skb_unlink(skb, &port->tx_skbs);
+ skb_match = skb;
+ break;
+ }
+ spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
+
+ /* Next ts */
+ lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1),
+ PTP_TWOSTEP_CTRL_NXT,
+ lan966x, PTP_TWOSTEP_CTRL);
+
+ if (WARN_ON(!skb_match))
+ continue;
+
+ spin_lock(&lan966x->ptp_ts_id_lock);
+ lan966x->ptp_skbs--;
+ spin_unlock(&lan966x->ptp_ts_id_lock);
+
+ /* Get the h/w timestamp */
+ lan966x_get_hwtimestamp(lan966x, &ts, delay);
+
+ /* Set the timestamp into the skb */
+ shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+ skb_tstamp_tx(skb_match, &shhwtstamps);
+
+ dev_kfree_skb_any(skb_match);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int lan966x_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+ struct lan966x *lan966x = phc->lan966x;
+ unsigned long flags;
+ bool neg_adj = 0;
+ u64 tod_inc;
+ u64 ref;
+
+ if (!scaled_ppm)
+ return 0;
+
+ if (scaled_ppm < 0) {
+ neg_adj = 1;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ tod_inc = lan966x_ptp_get_nominal_value();
+
+ /* The multiplication is split in 2 separate additions because of
+ * overflow issues. If scaled_ppm with 16bit fractional part was bigger
+ * than 20ppm then we got overflow.
+ */
+ ref = LAN966X_1PPM_FORMAT * (scaled_ppm >> 16);
+ ref += (LAN966X_1PPM_FORMAT * (0xffff & scaled_ppm)) >> 16;
+ tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref;
+
+ spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+ lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(1 << BIT(phc->index)),
+ PTP_DOM_CFG_CLKCFG_DIS,
+ lan966x, PTP_DOM_CFG);
+
+ lan_wr((u32)tod_inc & 0xFFFFFFFF, lan966x,
+ PTP_CLK_PER_CFG(phc->index, 0));
+ lan_wr((u32)(tod_inc >> 32), lan966x,
+ PTP_CLK_PER_CFG(phc->index, 1));
+
+ lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0),
+ PTP_DOM_CFG_CLKCFG_DIS,
+ lan966x, PTP_DOM_CFG);
+
+ spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+ return 0;
+}
+
+static int lan966x_ptp_settime64(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+ struct lan966x *lan966x = phc->lan966x;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+ /* Must be in IDLE mode before the time can be loaded */
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) |
+ PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ /* Set new value */
+ lan_wr(PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)),
+ lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN));
+ lan_wr(lower_32_bits(ts->tv_sec),
+ lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+ lan_wr(ts->tv_nsec, lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+ /* Apply new values */
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) |
+ PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+ return 0;
+}
+
+static int lan966x_ptp_gettime64(struct ptp_clock_info *ptp,
+ struct timespec64 *ts)
+{
+ struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+ struct lan966x *lan966x = phc->lan966x;
+ unsigned long flags;
+ time64_t s;
+ s64 ns;
+
+ spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) |
+ PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ s = lan_rd(lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN));
+ s <<= 32;
+ s |= lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+ ns = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+ ns &= PTP_TOD_NSEC_TOD_NSEC;
+
+ spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+ /* Deal with negative values */
+ if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) {
+ s--;
+ ns &= 0xf;
+ ns += 999999984;
+ }
+
+ set_normalized_timespec64(ts, s, ns);
+ return 0;
+}
+
+static int lan966x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+ struct lan966x *lan966x = phc->lan966x;
+
+ if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+ /* Must be in IDLE mode before the time can be loaded */
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) |
+ PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ lan_wr(PTP_TOD_NSEC_TOD_NSEC_SET(delta),
+ lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+ /* Adjust time with the value of PTP_TOD_NSEC */
+ lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) |
+ PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+ PTP_PIN_CFG_PIN_SYNC_SET(0),
+ PTP_PIN_CFG_PIN_ACTION |
+ PTP_PIN_CFG_PIN_DOM |
+ PTP_PIN_CFG_PIN_SYNC,
+ lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+ spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+ } else {
+ /* Fall back using lan966x_ptp_settime64 which is not exact */
+ struct timespec64 ts;
+ u64 now;
+
+ lan966x_ptp_gettime64(ptp, &ts);
+
+ now = ktime_to_ns(timespec64_to_ktime(ts));
+ ts = ns_to_timespec64(now + delta);
+
+ lan966x_ptp_settime64(ptp, &ts);
+ }
+
+ return 0;
+}
+
+static struct ptp_clock_info lan966x_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "lan966x ptp",
+ .max_adj = 200000,
+ .gettime64 = lan966x_ptp_gettime64,
+ .settime64 = lan966x_ptp_settime64,
+ .adjtime = lan966x_ptp_adjtime,
+ .adjfine = lan966x_ptp_adjfine,
+};
+
+static int lan966x_ptp_phc_init(struct lan966x *lan966x,
+ int index,
+ struct ptp_clock_info *clock_info)
+{
+ struct lan966x_phc *phc = &lan966x->phc[index];
+
+ phc->info = *clock_info;
+ phc->clock = ptp_clock_register(&phc->info, lan966x->dev);
+ if (IS_ERR(phc->clock))
+ return PTR_ERR(phc->clock);
+
+ phc->index = index;
+ phc->lan966x = lan966x;
+
+ /* PTP Rx stamping is always enabled. */
+ phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+ return 0;
+}
+
+int lan966x_ptp_init(struct lan966x *lan966x)
+{
+ u64 tod_adj = lan966x_ptp_get_nominal_value();
+ struct lan966x_port *port;
+ int err, i;
+
+ if (!lan966x->ptp)
+ return 0;
+
+ for (i = 0; i < LAN966X_PHC_COUNT; ++i) {
+ err = lan966x_ptp_phc_init(lan966x, i, &lan966x_ptp_clock_info);
+ if (err)
+ return err;
+ }
+
+ spin_lock_init(&lan966x->ptp_clock_lock);
+ spin_lock_init(&lan966x->ptp_ts_id_lock);
+ mutex_init(&lan966x->ptp_lock);
+
+ /* Disable master counters */
+ lan_wr(PTP_DOM_CFG_ENA_SET(0), lan966x, PTP_DOM_CFG);
+
+ /* Configure the nominal TOD increment per clock cycle */
+ lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0x7),
+ PTP_DOM_CFG_CLKCFG_DIS,
+ lan966x, PTP_DOM_CFG);
+
+ for (i = 0; i < LAN966X_PHC_COUNT; ++i) {
+ lan_wr((u32)tod_adj & 0xFFFFFFFF, lan966x,
+ PTP_CLK_PER_CFG(i, 0));
+ lan_wr((u32)(tod_adj >> 32), lan966x,
+ PTP_CLK_PER_CFG(i, 1));
+ }
+
+ lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0),
+ PTP_DOM_CFG_CLKCFG_DIS,
+ lan966x, PTP_DOM_CFG);
+
+ /* Enable master counters */
+ lan_wr(PTP_DOM_CFG_ENA_SET(0x7), lan966x, PTP_DOM_CFG);
+
+ for (i = 0; i < lan966x->num_phys_ports; i++) {
+ port = lan966x->ports[i];
+ if (!port)
+ continue;
+
+ skb_queue_head_init(&port->tx_skbs);
+ }
+
+ return 0;
+}
+
+void lan966x_ptp_deinit(struct lan966x *lan966x)
+{
+ struct lan966x_port *port;
+ int i;
+
+ for (i = 0; i < lan966x->num_phys_ports; i++) {
+ port = lan966x->ports[i];
+ if (!port)
+ continue;
+
+ skb_queue_purge(&port->tx_skbs);
+ }
+
+ for (i = 0; i < LAN966X_PHC_COUNT; ++i)
+ ptp_clock_unregister(lan966x->phc[i].clock);
+}
+
+void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb,
+ u64 timestamp)
+{
+ struct skb_shared_hwtstamps *shhwtstamps;
+ struct lan966x_phc *phc;
+ struct timespec64 ts;
+ u64 full_ts_in_ns;
+
+ if (!lan966x->ptp)
+ return;
+
+ phc = &lan966x->phc[LAN966X_PHC_PORT];
+ lan966x_ptp_gettime64(&phc->info, &ts);
+
+ /* Drop the sub-ns precision */
+ timestamp = timestamp >> 2;
+ if (ts.tv_nsec < timestamp)
+ ts.tv_sec--;
+ ts.tv_nsec = timestamp;
+ full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
+
+ shhwtstamps = skb_hwtstamps(skb);
+ shhwtstamps->hwtstamp = full_ts_in_ns;
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 797560172aca..0c0b3e173d53 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -19,6 +19,7 @@ enum lan966x_target {
TARGET_DEV = 13,
TARGET_GCB = 27,
TARGET_ORG = 36,
+ TARGET_PTP = 41,
TARGET_QS = 42,
TARGET_QSYS = 46,
TARGET_REW = 47,
@@ -298,6 +299,24 @@ enum lan966x_target {
/* ANA:PORT:CPU_FWD_CFG */
#define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4)
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA BIT(6)
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(x)\
+ FIELD_PREP(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x)
+#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_GET(x)\
+ FIELD_GET(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x)
+
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA BIT(5)
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(x)\
+ FIELD_PREP(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x)
+#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_GET(x)\
+ FIELD_GET(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x)
+
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA BIT(4)
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(x)\
+ FIELD_PREP(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x)
+#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_GET(x)\
+ FIELD_GET(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x)
+
#define ANA_CPU_FWD_CFG_SRC_COPY_ENA BIT(3)
#define ANA_CPU_FWD_CFG_SRC_COPY_ENA_SET(x)\
FIELD_PREP(ANA_CPU_FWD_CFG_SRC_COPY_ENA, x)
@@ -559,6 +578,108 @@ enum lan966x_target {
#define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\
FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x)
+/* PTP:PTP_CFG:PTP_DOM_CFG */
+#define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4)
+
+#define PTP_DOM_CFG_ENA GENMASK(11, 9)
+#define PTP_DOM_CFG_ENA_SET(x)\
+ FIELD_PREP(PTP_DOM_CFG_ENA, x)
+#define PTP_DOM_CFG_ENA_GET(x)\
+ FIELD_GET(PTP_DOM_CFG_ENA, x)
+
+#define PTP_DOM_CFG_CLKCFG_DIS GENMASK(2, 0)
+#define PTP_DOM_CFG_CLKCFG_DIS_SET(x)\
+ FIELD_PREP(PTP_DOM_CFG_CLKCFG_DIS, x)
+#define PTP_DOM_CFG_CLKCFG_DIS_GET(x)\
+ FIELD_GET(PTP_DOM_CFG_CLKCFG_DIS, x)
+
+/* PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */
+#define PTP_CLK_PER_CFG(g, r) __REG(TARGET_PTP, 0, 1, 528, g, 3, 28, 0, r, 2, 4)
+
+/* PTP:PTP_PINS:PTP_PIN_CFG */
+#define PTP_PIN_CFG(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 0, 0, 1, 4)
+
+#define PTP_PIN_CFG_PIN_ACTION GENMASK(29, 27)
+#define PTP_PIN_CFG_PIN_ACTION_SET(x)\
+ FIELD_PREP(PTP_PIN_CFG_PIN_ACTION, x)
+#define PTP_PIN_CFG_PIN_ACTION_GET(x)\
+ FIELD_GET(PTP_PIN_CFG_PIN_ACTION, x)
+
+#define PTP_PIN_CFG_PIN_SYNC GENMASK(26, 25)
+#define PTP_PIN_CFG_PIN_SYNC_SET(x)\
+ FIELD_PREP(PTP_PIN_CFG_PIN_SYNC, x)
+#define PTP_PIN_CFG_PIN_SYNC_GET(x)\
+ FIELD_GET(PTP_PIN_CFG_PIN_SYNC, x)
+
+#define PTP_PIN_CFG_PIN_DOM GENMASK(17, 16)
+#define PTP_PIN_CFG_PIN_DOM_SET(x)\
+ FIELD_PREP(PTP_PIN_CFG_PIN_DOM, x)
+#define PTP_PIN_CFG_PIN_DOM_GET(x)\
+ FIELD_GET(PTP_PIN_CFG_PIN_DOM, x)
+
+/* PTP:PTP_PINS:PTP_TOD_SEC_MSB */
+#define PTP_TOD_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 4, 0, 1, 4)
+
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB GENMASK(15, 0)
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(x)\
+ FIELD_PREP(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x)
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_GET(x)\
+ FIELD_GET(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x)
+
+/* PTP:PTP_PINS:PTP_TOD_SEC_LSB */
+#define PTP_TOD_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 8, 0, 1, 4)
+
+/* PTP:PTP_PINS:PTP_TOD_NSEC */
+#define PTP_TOD_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 12, 0, 1, 4)
+
+#define PTP_TOD_NSEC_TOD_NSEC GENMASK(29, 0)
+#define PTP_TOD_NSEC_TOD_NSEC_SET(x)\
+ FIELD_PREP(PTP_TOD_NSEC_TOD_NSEC, x)
+#define PTP_TOD_NSEC_TOD_NSEC_GET(x)\
+ FIELD_GET(PTP_TOD_NSEC_TOD_NSEC, x)
+
+/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_CTRL */
+#define PTP_TWOSTEP_CTRL __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 0, 0, 1, 4)
+
+#define PTP_TWOSTEP_CTRL_NXT BIT(11)
+#define PTP_TWOSTEP_CTRL_NXT_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_CTRL_NXT, x)
+#define PTP_TWOSTEP_CTRL_NXT_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_CTRL_NXT, x)
+
+#define PTP_TWOSTEP_CTRL_VLD BIT(10)
+#define PTP_TWOSTEP_CTRL_VLD_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_CTRL_VLD, x)
+#define PTP_TWOSTEP_CTRL_VLD_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_CTRL_VLD, x)
+
+#define PTP_TWOSTEP_CTRL_STAMP_TX BIT(9)
+#define PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_TX, x)
+#define PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_TX, x)
+
+#define PTP_TWOSTEP_CTRL_STAMP_PORT GENMASK(8, 1)
+#define PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_PORT, x)
+#define PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_PORT, x)
+
+#define PTP_TWOSTEP_CTRL_OVFL BIT(0)
+#define PTP_TWOSTEP_CTRL_OVFL_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_CTRL_OVFL, x)
+#define PTP_TWOSTEP_CTRL_OVFL_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_CTRL_OVFL, x)
+
+/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_STAMP */
+#define PTP_TWOSTEP_STAMP __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 4, 0, 1, 4)
+
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC GENMASK(31, 2)
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\
+ FIELD_PREP(PTP_TWOSTEP_STAMP_STAMP_NSEC, x)
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\
+ FIELD_GET(PTP_TWOSTEP_STAMP_STAMP_NSEC, x)
+
/* DEVCPU_QS:XTR:XTR_GRP_CFG */
#define QS_XTR_GRP_CFG(r) __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 0, r, 2, 4)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 7de55f6a4da8..9fce865287e7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -9,6 +9,37 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly;
static struct notifier_block lan966x_switchdev_nb __read_mostly;
static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
+static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port,
+ u32 pgid_ip)
+{
+ struct lan966x *lan966x = port->lan966x;
+ u32 flood_mask_ip;
+
+ flood_mask_ip = lan_rd(lan966x, ANA_PGID(pgid_ip));
+ flood_mask_ip = ANA_PGID_PGID_GET(flood_mask_ip);
+
+ /* If mcast snooping is not enabled then use mcast flood mask
+ * to decide to enable multicast flooding or not.
+ */
+ if (!port->mcast_ena) {
+ u32 flood_mask;
+
+ flood_mask = lan_rd(lan966x, ANA_PGID(PGID_MC));
+ flood_mask = ANA_PGID_PGID_GET(flood_mask);
+
+ if (flood_mask & BIT(port->chip_port))
+ flood_mask_ip |= BIT(port->chip_port);
+ else
+ flood_mask_ip &= ~BIT(port->chip_port);
+ } else {
+ flood_mask_ip &= ~BIT(port->chip_port);
+ }
+
+ lan_rmw(ANA_PGID_PGID_SET(flood_mask_ip),
+ ANA_PGID_PGID,
+ lan966x, ANA_PGID(pgid_ip));
+}
+
static void lan966x_port_set_mcast_flood(struct lan966x_port *port,
bool enabled)
{
@@ -23,6 +54,11 @@ static void lan966x_port_set_mcast_flood(struct lan966x_port *port,
lan_rmw(ANA_PGID_PGID_SET(val),
ANA_PGID_PGID,
port->lan966x, ANA_PGID(PGID_MC));
+
+ if (!port->mcast_ena) {
+ lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4);
+ lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6);
+ }
}
static void lan966x_port_set_ucast_flood(struct lan966x_port *port,
@@ -144,6 +180,28 @@ static void lan966x_port_ageing_set(struct lan966x_port *port,
lan966x_mac_set_ageing(port->lan966x, ageing_time);
}
+static void lan966x_port_mc_set(struct lan966x_port *port, bool mcast_ena)
+{
+ struct lan966x *lan966x = port->lan966x;
+
+ port->mcast_ena = mcast_ena;
+ if (mcast_ena)
+ lan966x_mdb_restore_entries(lan966x);
+ else
+ lan966x_mdb_clear_entries(lan966x);
+
+ lan_rmw(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(mcast_ena) |
+ ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(mcast_ena) |
+ ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(mcast_ena),
+ ANA_CPU_FWD_CFG_IGMP_REDIR_ENA |
+ ANA_CPU_FWD_CFG_MLD_REDIR_ENA |
+ ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA,
+ lan966x, ANA_CPU_FWD_CFG(port->chip_port));
+
+ lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4);
+ lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6);
+}
+
static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack)
@@ -171,6 +229,9 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
lan966x_vlan_port_set_vlan_aware(port, attr->u.vlan_filtering);
lan966x_vlan_port_apply(port);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+ lan966x_port_mc_set(port, !attr->u.mc_disabled);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 16266275dd36..394de85d360d 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -291,7 +291,6 @@ static int sparx5_create_port(struct sparx5 *sparx5,
/* Create a phylink for PHY management. Also handles SFPs */
spx5_port->phylink_config.dev = &spx5_port->ndev->dev;
spx5_port->phylink_config.type = PHYLINK_NETDEV;
- spx5_port->phylink_config.pcs_poll = true;
spx5_port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE |
MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD |
MAC_2500FD | MAC_5000FD | MAC_10000FD | MAC_25000FD;
@@ -328,7 +327,6 @@ static int sparx5_create_port(struct sparx5 *sparx5,
return PTR_ERR(phylink);
spx5_port->phylink = phylink;
- phylink_set_pcs(phylink, &spx5_port->phylink_pcs);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
index 8ba33bc1a001..830da0e5ff27 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
@@ -26,6 +26,15 @@ static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_po
return false;
}
+static struct phylink_pcs *
+sparx5_phylink_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct sparx5_port *port = netdev_priv(to_net_dev(config->dev));
+
+ return &port->phylink_pcs;
+}
+
static void sparx5_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
@@ -130,6 +139,7 @@ const struct phylink_pcs_ops sparx5_phylink_pcs_ops = {
const struct phylink_mac_ops sparx5_phylink_mac_ops = {
.validate = phylink_generic_validate,
+ .mac_select_pcs = sparx5_phylink_mac_select_pcs,
.mac_config = sparx5_phylink_mac_config,
.mac_link_down = sparx5_phylink_mac_link_down,
.mac_link_up = sparx5_phylink_mac_link_up,
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 636dfef24a6c..49b85ca578b0 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
struct gdma_context *gc = gd->gdma_context;
struct hw_channel_context *hwc;
u32 length = gmi->length;
- u32 req_msg_size;
+ size_t req_msg_size;
int err;
int i;
@@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
return -EINVAL;
hwc = gc->hwc.driver_data;
- req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+ req_msg_size = struct_size(req, page_addr_list, num_page);
if (req_msg_size > hwc->max_req_msg_size)
return -EINVAL;
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index 9a12607fb511..d36405af9432 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -48,7 +48,15 @@ enum TRI_STATE {
#define MAX_PORTS_IN_MANA_DEV 256
-struct mana_stats {
+struct mana_stats_rx {
+ u64 packets;
+ u64 bytes;
+ u64 xdp_drop;
+ u64 xdp_tx;
+ struct u64_stats_sync syncp;
+};
+
+struct mana_stats_tx {
u64 packets;
u64 bytes;
struct u64_stats_sync syncp;
@@ -76,7 +84,7 @@ struct mana_txq {
atomic_t pending_sends;
- struct mana_stats stats;
+ struct mana_stats_tx stats;
};
/* skb data and frags dma mappings */
@@ -298,10 +306,11 @@ struct mana_rxq {
u32 buf_index;
- struct mana_stats stats;
+ struct mana_stats_rx stats;
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
+ struct page *xdp_save_page;
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 498d0f999275..b7d3ba1b4d17 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -136,7 +136,7 @@ int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
bool ipv4 = false, ipv6 = false;
struct mana_tx_package pkg = {};
struct netdev_queue *net_txq;
- struct mana_stats *tx_stats;
+ struct mana_stats_tx *tx_stats;
struct gdma_queue *gdma_sq;
unsigned int csum_type;
struct mana_txq *txq;
@@ -299,7 +299,8 @@ static void mana_get_stats64(struct net_device *ndev,
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
- struct mana_stats *stats;
+ struct mana_stats_rx *rx_stats;
+ struct mana_stats_tx *tx_stats;
unsigned int start;
u64 packets, bytes;
int q;
@@ -310,26 +311,26 @@ static void mana_get_stats64(struct net_device *ndev,
netdev_stats_to_stats64(st, &ndev->stats);
for (q = 0; q < num_queues; q++) {
- stats = &apc->rxqs[q]->stats;
+ rx_stats = &apc->rxqs[q]->stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
st->rx_packets += packets;
st->rx_bytes += bytes;
}
for (q = 0; q < num_queues; q++) {
- stats = &apc->tx_qp[q].txq.stats;
+ tx_stats = &apc->tx_qp[q].txq.stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
st->tx_packets += packets;
st->tx_bytes += bytes;
@@ -986,7 +987,7 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
struct mana_rxq *rxq)
{
- struct mana_stats *rx_stats = &rxq->stats;
+ struct mana_stats_rx *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
uint pkt_len = cqe->ppi[0].pkt_len;
u16 rxq_idx = rxq->rxq_idx;
@@ -1007,7 +1008,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
if (act != XDP_PASS && act != XDP_TX)
- goto drop;
+ goto drop_xdp;
skb = mana_build_skb(buf_va, pkt_len, &xdp);
@@ -1034,6 +1035,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
}
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->packets++;
+ rx_stats->bytes += pkt_len;
+
+ if (act == XDP_TX)
+ rx_stats->xdp_tx++;
+ u64_stats_update_end(&rx_stats->syncp);
+
if (act == XDP_TX) {
skb_set_queue_mapping(skb, rxq_idx);
mana_xdp_tx(skb, ndev);
@@ -1042,15 +1051,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
napi_gro_receive(napi, skb);
+ return;
+
+drop_xdp:
u64_stats_update_begin(&rx_stats->syncp);
- rx_stats->packets++;
- rx_stats->bytes += pkt_len;
+ rx_stats->xdp_drop++;
u64_stats_update_end(&rx_stats->syncp);
- return;
drop:
- free_page((unsigned long)buf_va);
+ WARN_ON_ONCE(rxq->xdp_save_page);
+ rxq->xdp_save_page = virt_to_page(buf_va);
+
++ndev->stats.rx_dropped;
+
return;
}
@@ -1072,8 +1085,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
break;
case CQE_RX_TRUNCATED:
- netdev_err(ndev, "Dropped a truncated packet\n");
- return;
+ ++ndev->stats.rx_dropped;
+ rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
+ netdev_warn_once(ndev, "Dropped a truncated packet\n");
+ goto drop;
case CQE_RX_COALESCED_4:
netdev_err(ndev, "RX coalescing is unsupported\n");
@@ -1089,9 +1104,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
return;
}
- if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY)
- return;
-
pktlen = oob->ppi[0].pkt_len;
if (pktlen == 0) {
@@ -1105,7 +1117,13 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- new_page = alloc_page(GFP_ATOMIC);
+ /* Reuse XDP dropped page if available */
+ if (rxq->xdp_save_page) {
+ new_page = rxq->xdp_save_page;
+ rxq->xdp_save_page = NULL;
+ } else {
+ new_page = alloc_page(GFP_ATOMIC);
+ }
if (new_page) {
da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
@@ -1135,6 +1153,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
mana_rx_skb(old_buf, oob, rxq);
+drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
mana_post_pkt_rxq(rxq);
@@ -1392,6 +1411,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
mana_deinit_cq(apc, &rxq->rx_cq);
+ if (rxq->xdp_save_page)
+ __free_page(rxq->xdp_save_page);
+
for (i = 0; i < rxq->num_rx_buf; i++) {
rx_oob = &rxq->rx_oobs[i];
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index c3c81ae3fafd..e13f2453eabb 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
if (stringset != ETH_SS_STATS)
return -EINVAL;
- return ARRAY_SIZE(mana_eth_stats) + num_queues * 4;
+ return ARRAY_SIZE(mana_eth_stats) + num_queues * 6;
}
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
@@ -46,6 +46,10 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "rx_%d_bytes", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_%d_xdp_drop", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_%d_xdp_tx", i);
+ p += ETH_GSTRING_LEN;
}
for (i = 0; i < num_queues; i++) {
@@ -62,9 +66,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
void *eth_stats = &apc->eth_stats;
- struct mana_stats *stats;
+ struct mana_stats_rx *rx_stats;
+ struct mana_stats_tx *tx_stats;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_drop;
+ u64 xdp_tx;
int q, i = 0;
if (!apc->port_is_up)
@@ -74,26 +81,30 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
for (q = 0; q < num_queues; q++) {
- stats = &apc->rxqs[q]->stats;
+ rx_stats = &apc->rxqs[q]->stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ xdp_drop = rx_stats->xdp_drop;
+ xdp_tx = rx_stats->xdp_tx;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_drop;
+ data[i++] = xdp_tx;
}
for (q = 0; q < num_queues; q++) {
- stats = &apc->tx_qp[q].txq.stats;
+ tx_stats = &apc->tx_qp[q].txq.stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 0a326e04e692..cd50db779dda 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list,
return 0;
}
- entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
+ entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC);
if (!entry) {
spin_unlock_bh(list_lock);
return -ENOMEM;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 3d61a8cb60b0..50007cc5b580 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,8 +1,7 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
-/*
- * nfp_net_ctrl.h
+/* nfp_net_ctrl.h
* Netronome network device driver: Control BAR layout
* Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
* Jason McMullan <jason.mcmullan@netronome.com>
@@ -15,30 +14,24 @@
#include <linux/types.h>
-/**
- * Configuration BAR size.
+/* Configuration BAR size.
*
* The configuration BAR is 8K in size, but due to
* THB-350, 32k needs to be reserved.
*/
#define NFP_NET_CFG_BAR_SZ (32 * 1024)
-/**
- * Offset in Freelist buffer where packet starts on RX
- */
+/* Offset in Freelist buffer where packet starts on RX */
#define NFP_NET_RX_OFFSET 32
-/**
- * LSO parameters
+/* LSO parameters
* %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames
* %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce
*/
#define NFP_NET_LSO_MAX_HDR_SZ 255
#define NFP_NET_LSO_MAX_SEGS 64
-/**
- * Prepend field types
- */
+/* Prepend field types */
#define NFP_NET_META_FIELD_SIZE 4
#define NFP_NET_META_HASH 1 /* next field carries hash type */
#define NFP_NET_META_MARK 2
@@ -49,9 +42,7 @@
#define NFP_META_PORT_ID_CTRL ~0U
-/**
- * Hash type pre-pended when a RSS hash was computed
- */
+/* Hash type pre-pended when a RSS hash was computed */
#define NFP_NET_RSS_NONE 0
#define NFP_NET_RSS_IPV4 1
#define NFP_NET_RSS_IPV6 2
@@ -63,16 +54,14 @@
#define NFP_NET_RSS_IPV6_UDP 8
#define NFP_NET_RSS_IPV6_EX_UDP 9
-/**
- * Ring counts
+/* Ring counts
* %NFP_NET_TXR_MAX: Maximum number of TX rings
* %NFP_NET_RXR_MAX: Maximum number of RX rings
*/
#define NFP_NET_TXR_MAX 64
#define NFP_NET_RXR_MAX 64
-/**
- * Read/Write config words (0x0000 - 0x002c)
+/* Read/Write config words (0x0000 - 0x002c)
* %NFP_NET_CFG_CTRL: Global control
* %NFP_NET_CFG_UPDATE: Indicate which fields are updated
* %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
@@ -147,8 +136,7 @@
#define NFP_NET_CFG_LSC 0x0020
#define NFP_NET_CFG_MACADDR 0x0024
-/**
- * Read-only words (0x0030 - 0x0050):
+/* Read-only words (0x0030 - 0x0050):
* %NFP_NET_CFG_VERSION: Firmware version number
* %NFP_NET_CFG_STS: Status
* %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
@@ -193,36 +181,31 @@
#define NFP_NET_CFG_START_TXQ 0x0048
#define NFP_NET_CFG_START_RXQ 0x004c
-/**
- * Prepend configuration
+/* Prepend configuration
*/
#define NFP_NET_CFG_RX_OFFSET 0x0050
#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */
-/**
- * RSS capabilities
+/* RSS capabilities
* %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
* %NFP_NET_CFG_RSS_HFUNC)
*/
#define NFP_NET_CFG_RSS_CAP 0x0054
#define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000
-/**
- * TLV area start
+/* TLV area start
* %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area
*/
#define NFP_NET_CFG_TLV_BASE 0x0058
-/**
- * VXLAN/UDP encap configuration
+/* VXLAN/UDP encap configuration
* %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports
* %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes
*/
#define NFP_NET_CFG_VXLAN_PORT 0x0060
#define NFP_NET_CFG_VXLAN_SZ 0x0008
-/**
- * BPF section
+/* BPF section
* %NFP_NET_CFG_BPF_ABI: BPF ABI version
* %NFP_NET_CFG_BPF_CAP: BPF capabilities
* %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
@@ -247,14 +230,12 @@
#define NFP_NET_CFG_BPF_CFG_MASK 7ULL
#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK)
-/**
- * 40B reserved for future use (0x0098 - 0x00c0)
+/* 40B reserved for future use (0x0098 - 0x00c0)
*/
#define NFP_NET_CFG_RESERVED 0x0098
#define NFP_NET_CFG_RESERVED_SZ 0x0028
-/**
- * RSS configuration (0x0100 - 0x01ac):
+/* RSS configuration (0x0100 - 0x01ac):
* Used only when NFP_NET_CFG_CTRL_RSS is enabled
* %NFP_NET_CFG_RSS_CFG: RSS configuration word
* %NFP_NET_CFG_RSS_KEY: RSS "secret" key
@@ -281,8 +262,7 @@
NFP_NET_CFG_RSS_KEY_SZ)
#define NFP_NET_CFG_RSS_ITBL_SZ 0x80
-/**
- * TX ring configuration (0x200 - 0x800)
+/* TX ring configuration (0x200 - 0x800)
* %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration
* %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries)
* %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
@@ -301,8 +281,7 @@
#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \
((_x) * 0x4))
-/**
- * RX ring configuration (0x0800 - 0x0c00)
+/* RX ring configuration (0x0800 - 0x0c00)
* %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration
* %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries)
* %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries)
@@ -318,8 +297,7 @@
#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \
((_x) * 0x4))
-/**
- * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+/* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
* These registers are only used when MSI-X auto-masking is not
* enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index
* by MSI-X entry and are 1B in size. If an entry is zero, the
@@ -334,8 +312,7 @@
#define NFP_NET_CFG_ICR_RXTX 0x1
#define NFP_NET_CFG_ICR_LSC 0x2
-/**
- * General device stats (0x0d00 - 0x0d90)
+/* General device stats (0x0d00 - 0x0d90)
* all counters are 64bit.
*/
#define NFP_NET_CFG_STATS_BASE 0x0d00
@@ -368,8 +345,7 @@
#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0)
#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8)
-/**
- * Per ring stats (0x1000 - 0x1800)
+/* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry
* %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
* %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
@@ -381,8 +357,7 @@
#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \
((_x) * 0x10))
-/**
- * General use mailbox area (0x1800 - 0x19ff)
+/* General use mailbox area (0x1800 - 0x19ff)
* 4B used for update command and 4B return code
* followed by a max of 504B of variable length value
*/
@@ -399,8 +374,7 @@
#define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5
#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6
-/**
- * VLAN filtering using general use mailbox
+/* VLAN filtering using general use mailbox
* %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
* %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter
@@ -411,8 +385,7 @@
#define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2)
#define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004
-/**
- * TLV capabilities
+/* TLV capabilities
* %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
* %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
* %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV
@@ -438,8 +411,7 @@
#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000
#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff
-/**
- * Capability TLV types
+/* Capability TLV types
*
* %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
* Special TLV type to catch bugs, should never be encountered. Drivers should
@@ -512,8 +484,7 @@
struct device;
-/**
- * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
* @me_freq_mhz: ME clock_freq (MHz)
* @mbox_off: vNIC mailbox area offset
* @mbox_len: vNIC mailbox area length
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
index a3db0cbf6425..786be58a907e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
@@ -4,8 +4,7 @@
#ifndef _NFP_NET_SRIOV_H_
#define _NFP_NET_SRIOV_H_
-/**
- * SRIOV VF configuration.
+/* SRIOV VF configuration.
* The configuration memory begins with a mailbox region for communication with
* the firmware followed by individual VF entries.
*/
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index ae4da189d955..df316b9e891d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
void nfp_devlink_port_type_eth_set(struct nfp_port *port);
void nfp_devlink_port_type_clear(struct nfp_port *port);
-/**
- * Mac stats (0x0000 - 0x0200)
+/* Mac stats (0x0000 - 0x0200)
* all counters are 64bit.
*/
#define NFP_MAC_STATS_BASE 0x0000
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 10e7d8b21c46..730fea214b8a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
dma_size = BIT_ULL(dma_order);
nseg = DIV_ROUND_UP(max_size, chunk_size);
- chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+ chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL);
if (!chunks)
return -ENOMEM;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 5e25411ff02f..602f4d45d529 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -18,7 +18,7 @@ struct ionic_lif;
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
-#define DEVCMD_TIMEOUT 10
+#define DEVCMD_TIMEOUT 5
#define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100)
#define IONIC_PHC_UPDATE_NS 10000000000 /* 10s in nanoseconds */
@@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
u8 status, int err);
int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+ int err);
int ionic_set_dma_mask(struct ionic *ionic);
int ionic_setup(struct ionic *ionic);
@@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic);
int ionic_port_init(struct ionic *ionic);
int ionic_port_reset(struct ionic *ionic);
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr);
+
#endif /* _IONIC_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 7e296fa71b36..6ffc62c41165 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
static void ionic_vf_dealloc_locked(struct ionic *ionic)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
struct ionic_vf *v;
- dma_addr_t dma = 0;
int i;
if (!ionic->vfs)
@@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic)
v = &ionic->vfs[i];
if (v->stats_pa) {
- (void)ionic_set_vf_config(ionic, i,
- IONIC_VF_ATTR_STATSADDR,
- (u8 *)&dma);
+ vfc.stats_pa = 0;
+ (void)ionic_set_vf_config(ionic, i, &vfc);
dma_unmap_single(ionic->dev, v->stats_pa,
sizeof(v->stats), DMA_FROM_DEVICE);
v->stats_pa = 0;
@@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic)
static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
struct ionic_vf *v;
int err = 0;
int i;
@@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
}
ionic->num_vfs++;
+
/* ignore failures from older FW, we just won't get stats */
- (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
- (u8 *)&v->stats_pa);
+ vfc.stats_pa = cpu_to_le64(v->stats_pa);
+ (void)ionic_set_vf_config(ionic, i, &vfc);
}
out:
@@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_deregister_lifs;
}
+ mod_timer(&ionic->watchdog_timer,
+ round_jiffies(jiffies + ionic->watchdog_period));
+
return 0;
err_out_deregister_lifs:
@@ -348,7 +352,6 @@ err_out_port_reset:
err_out_reset:
ionic_reset(ionic);
err_out_teardown:
- del_timer_sync(&ionic->watchdog_timer);
pci_clear_master(pdev);
/* Don't fail the probe for these errors, keep
* the hw interface around for inspection
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index d57e80d44c9d..52a1b5cfd8e7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t)
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
- if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+ if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
netdev_err(lif->netdev, "rxmode change dropped\n");
@@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t)
}
}
+static void ionic_watchdog_init(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+
+ timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+ ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+ /* set times to ensure the first check will proceed */
+ atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+ idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+ /* init as ready, so no transition if the first check succeeds */
+ idev->last_fw_hb = 0;
+ idev->fw_hb_ready = true;
+ idev->fw_status_ready = true;
+ idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+ ioread8(&idev->dev_info_regs->fw_status);
+}
+
void ionic_init_devinfo(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
@@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic)
return -EFAULT;
}
- timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
- ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
-
- /* set times to ensure the first check will proceed */
- atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
- idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
- /* init as ready, so no transition if the first check succeeds */
- idev->last_fw_hb = 0;
- idev->fw_hb_ready = true;
- idev->fw_status_ready = true;
- idev->fw_generation = IONIC_FW_STS_F_GENERATION &
- ioread8(&idev->dev_info_regs->fw_status);
-
- mod_timer(&ionic->watchdog_timer,
- round_jiffies(jiffies + ionic->watchdog_period));
+ ionic_watchdog_init(ionic);
idev->db_pages = bar->vaddr;
idev->phy_db_pages = bar->bus_addr;
@@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic)
}
/* Devcmd Interface */
+bool ionic_is_fw_running(struct ionic_dev *idev)
+{
+ u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+ /* firmware is useful only if the running bit is set and
+ * fw_status != 0xff (bad PCI read)
+ */
+ return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+}
+
int ionic_heartbeat_check(struct ionic *ionic)
{
- struct ionic_dev *idev = &ionic->idev;
unsigned long check_time, last_check_time;
+ struct ionic_dev *idev = &ionic->idev;
+ struct ionic_lif *lif = ionic->lif;
bool fw_status_ready = true;
bool fw_hb_ready;
u8 fw_generation;
@@ -155,13 +171,10 @@ do_check_time:
goto do_check_time;
}
- /* firmware is useful only if the running bit is set and
- * fw_status != 0xff (bad PCI read)
- * If fw_status is not ready don't bother with the generation.
- */
fw_status = ioread8(&idev->dev_info_regs->fw_status);
- if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+ /* If fw_status is not ready don't bother with the generation */
+ if (!ionic_is_fw_running(idev)) {
fw_status_ready = false;
} else {
fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
@@ -176,31 +189,41 @@ do_check_time:
* the down, the next watchdog will see the fw is up
* and the generation value stable, so will trigger
* the fw-up activity.
+ *
+ * If we had already moved to FW_RESET from a RESET event,
+ * it is possible that we never saw the fw_status go to 0,
+ * so we fake the current idev->fw_status_ready here to
+ * force the transition and get FW up again.
*/
- fw_status_ready = false;
+ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+ idev->fw_status_ready = false; /* go to running */
+ else
+ fw_status_ready = false; /* go to down */
}
}
/* is this a transition? */
if (fw_status_ready != idev->fw_status_ready) {
- struct ionic_lif *lif = ionic->lif;
bool trigger = false;
- idev->fw_status_ready = fw_status_ready;
-
- if (!fw_status_ready) {
- dev_info(ionic->dev, "FW stopped %u\n", fw_status);
- if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
- trigger = true;
- } else {
- dev_info(ionic->dev, "FW running %u\n", fw_status);
- if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state))
- trigger = true;
+ if (!fw_status_ready && lif &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status);
+ trigger = true;
+
+ } else if (fw_status_ready && lif &&
+ test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ dev_info(ionic->dev, "FW running 0x%02x\n", fw_status);
+ trigger = true;
}
if (trigger) {
struct ionic_deferred_work *work;
+ idev->fw_status_ready = fw_status_ready;
+
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
work->type = IONIC_DW_TYPE_LIF_RESET;
@@ -210,12 +233,14 @@ do_check_time:
}
}
- if (!fw_status_ready)
+ if (!idev->fw_status_ready)
return -ENXIO;
- /* wait at least one watchdog period since the last heartbeat */
+ /* Because of some variability in the actual FW heartbeat, we
+ * wait longer than the DEVCMD_TIMEOUT before checking again.
+ */
last_check_time = idev->last_hb_time;
- if (time_before(check_time, last_check_time + ionic->watchdog_period))
+ if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ))
return 0;
fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
@@ -392,60 +417,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
}
/* VF commands */
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+ struct ionic_vf_setattr_cmd *vfc)
{
union ionic_dev_cmd cmd = {
.vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
- .vf_setattr.attr = attr,
+ .vf_setattr.attr = vfc->attr,
.vf_setattr.vf_index = cpu_to_le16(vf),
};
int err;
+ memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad));
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_go(&ionic->idev, &cmd);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+ struct ionic_vf_getattr_comp *comp)
+{
+ union ionic_dev_cmd cmd = {
+ .vf_getattr.opcode = IONIC_CMD_VF_GETATTR,
+ .vf_getattr.attr = attr,
+ .vf_getattr.vf_index = cpu_to_le16(vf),
+ };
+ int err;
+
+ if (vf >= ionic->num_vfs)
+ return -EINVAL;
+
switch (attr) {
case IONIC_VF_ATTR_SPOOFCHK:
- cmd.vf_setattr.spoofchk = *data;
- dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_TRUST:
- cmd.vf_setattr.trust = *data;
- dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_LINKSTATE:
- cmd.vf_setattr.linkstate = *data;
- dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
- __func__, vf, *data);
- break;
case IONIC_VF_ATTR_MAC:
- ether_addr_copy(cmd.vf_setattr.macaddr, data);
- dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
- __func__, vf, data);
- break;
case IONIC_VF_ATTR_VLAN:
- cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
- dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
- __func__, vf, *(u16 *)data);
- break;
case IONIC_VF_ATTR_RATE:
- cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
- dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
- __func__, vf, *(u32 *)data);
break;
case IONIC_VF_ATTR_STATSADDR:
- cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
- dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
- __func__, vf, *(u64 *)data);
- break;
default:
return -EINVAL;
}
mutex_lock(&ionic->dev_cmd_lock);
ionic_dev_cmd_go(&ionic->idev, &cmd);
- err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT);
+ memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr,
+ sizeof(*comp));
mutex_unlock(&ionic->dev_cmd_lock);
+ if (err && comp->status != IONIC_RC_ENOSUPP)
+ ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode,
+ comp->status, err);
+
return err;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index e5acf3bd62b2..563c302eb033 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+ struct ionic_vf_setattr_cmd *vfc);
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+ struct ionic_vf_getattr_comp *comp);
void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
u16 lif_type, u8 qtype, u8 qver);
void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
@@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
unsigned int stop_index);
int ionic_heartbeat_check(struct ionic *ionic);
+bool ionic_is_fw_running(struct ionic_dev *idev);
#endif /* _IONIC_DEV_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 2ff7be17e5af..542e395fb037 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1112,12 +1112,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
break;
case IONIC_EVENT_RESET:
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- netdev_err(lif->netdev, "Reset event dropped\n");
- } else {
- work->type = IONIC_DW_TYPE_LIF_RESET;
- ionic_lif_deferred_enqueue(&lif->deferred, work);
+ if (lif->ionic->idev.fw_status_ready &&
+ !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "Reset event dropped\n");
+ clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+ } else {
+ work->type = IONIC_DW_TYPE_LIF_RESET;
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ }
}
break;
default:
@@ -1782,7 +1787,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif)
err = ionic_adminq_post_wait(lif, &ctx);
if (err)
- netdev_err(lif->netdev, "lif quiesce failed %d\n", err);
+ netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err);
}
static void ionic_txrx_disable(struct ionic_lif *lif)
@@ -2152,6 +2157,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd
}
}
+static int ionic_update_cached_vf_config(struct ionic *ionic, int vf)
+{
+ struct ionic_vf_getattr_comp comp = { 0 };
+ int err;
+ u8 attr;
+
+ attr = IONIC_VF_ATTR_VLAN;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].vlanid = comp.vlanid;
+
+ attr = IONIC_VF_ATTR_SPOOFCHK;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].spoofchk = comp.spoofchk;
+
+ attr = IONIC_VF_ATTR_LINKSTATE;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err) {
+ switch (comp.linkstate) {
+ case IONIC_VF_LINK_STATUS_UP:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE;
+ break;
+ case IONIC_VF_LINK_STATUS_DOWN:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE;
+ break;
+ case IONIC_VF_LINK_STATUS_AUTO:
+ ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO;
+ break;
+ default:
+ dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate);
+ break;
+ }
+ }
+
+ attr = IONIC_VF_ATTR_RATE;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].maxrate = comp.maxrate;
+
+ attr = IONIC_VF_ATTR_TRUST;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ionic->vfs[vf].trusted = comp.trust;
+
+ attr = IONIC_VF_ATTR_MAC;
+ err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+ if (err && comp.status != IONIC_RC_ENOSUPP)
+ goto err_out;
+ if (!err)
+ ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr);
+
+err_out:
+ if (err)
+ dev_err(ionic->dev, "Failed to get %s for VF %d\n",
+ ionic_vf_attr_to_str(attr), vf);
+
+ return err;
+}
+
static int ionic_get_vf_config(struct net_device *netdev,
int vf, struct ifla_vf_info *ivf)
{
@@ -2167,14 +2242,18 @@ static int ionic_get_vf_config(struct net_device *netdev,
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ivf->vf = vf;
- ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid);
- ivf->qos = 0;
- ivf->spoofchk = ionic->vfs[vf].spoofchk;
- ivf->linkstate = ionic->vfs[vf].linkstate;
- ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate);
- ivf->trusted = ionic->vfs[vf].trusted;
- ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+ ivf->vf = vf;
+ ivf->qos = 0;
+
+ ret = ionic_update_cached_vf_config(ionic, vf);
+ if (!ret) {
+ ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid);
+ ivf->spoofchk = ionic->vfs[vf].spoofchk;
+ ivf->linkstate = ionic->vfs[vf].linkstate;
+ ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate);
+ ivf->trusted = ionic->vfs[vf].trusted;
+ ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+ }
}
up_read(&ionic->vf_op_lock);
@@ -2220,6 +2299,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf,
static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
@@ -2235,7 +2315,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+ ether_addr_copy(vfc.macaddr, mac);
+ dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+ __func__, vf, vfc.macaddr);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ether_addr_copy(ionic->vfs[vf].macaddr, mac);
}
@@ -2247,6 +2331,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
u8 qos, __be16 proto)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
@@ -2269,8 +2354,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+ vfc.vlanid = cpu_to_le16(vlan);
+ dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+ __func__, vf, le16_to_cpu(vfc.vlanid));
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ionic->vfs[vf].vlanid = cpu_to_le16(vlan);
}
@@ -2282,6 +2370,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
static int ionic_set_vf_rate(struct net_device *netdev, int vf,
int tx_min, int tx_max)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
int ret;
@@ -2298,8 +2387,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+ vfc.maxrate = cpu_to_le32(tx_max);
+ dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+ __func__, vf, le32_to_cpu(vfc.maxrate));
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max);
}
@@ -2310,9 +2402,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data = set; /* convert to u8 for config */
int ret;
if (!netif_device_present(netdev))
@@ -2323,10 +2415,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_SPOOFCHK, &data);
+ vfc.spoofchk = set;
+ dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+ __func__, vf, vfc.spoofchk);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
- ionic->vfs[vf].spoofchk = data;
+ ionic->vfs[vf].spoofchk = set;
}
up_write(&ionic->vf_op_lock);
@@ -2335,9 +2430,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data = set; /* convert to u8 for config */
int ret;
if (!netif_device_present(netdev))
@@ -2348,10 +2443,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_TRUST, &data);
+ vfc.trust = set;
+ dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+ __func__, vf, vfc.trust);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
- ionic->vfs[vf].trusted = data;
+ ionic->vfs[vf].trusted = set;
}
up_write(&ionic->vf_op_lock);
@@ -2360,20 +2458,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
{
+ struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE };
struct ionic_lif *lif = netdev_priv(netdev);
struct ionic *ionic = lif->ionic;
- u8 data;
+ u8 vfls;
int ret;
switch (set) {
case IFLA_VF_LINK_STATE_ENABLE:
- data = IONIC_VF_LINK_STATUS_UP;
+ vfls = IONIC_VF_LINK_STATUS_UP;
break;
case IFLA_VF_LINK_STATE_DISABLE:
- data = IONIC_VF_LINK_STATUS_DOWN;
+ vfls = IONIC_VF_LINK_STATUS_DOWN;
break;
case IFLA_VF_LINK_STATE_AUTO:
- data = IONIC_VF_LINK_STATUS_AUTO;
+ vfls = IONIC_VF_LINK_STATUS_AUTO;
break;
default:
return -EINVAL;
@@ -2387,8 +2486,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
} else {
- ret = ionic_set_vf_config(ionic, vf,
- IONIC_VF_ATTR_LINKSTATE, &data);
+ vfc.linkstate = vfls;
+ dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+ __func__, vf, vfc.linkstate);
+
+ ret = ionic_set_vf_config(ionic, vf, &vfc);
if (!ret)
ionic->vfs[vf].linkstate = set;
}
@@ -2835,6 +2937,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
mutex_unlock(&lif->queue_lock);
+ clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
dev_info(ionic->dev, "FW Down: LIFs stopped\n");
}
@@ -2934,8 +3037,6 @@ void ionic_lif_free(struct ionic_lif *lif)
/* unmap doorbell page */
ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
lif->kern_dbpage = NULL;
- kfree(lif->dbid_inuse);
- lif->dbid_inuse = NULL;
mutex_destroy(&lif->config_lock);
mutex_destroy(&lif->queue_lock);
@@ -3135,22 +3236,12 @@ int ionic_lif_init(struct ionic_lif *lif)
return -EINVAL;
}
- lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
- if (!lif->dbid_inuse) {
- dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
- return -ENOMEM;
- }
-
- /* first doorbell id reserved for kernel (dbid aka pid == zero) */
- set_bit(0, lif->dbid_inuse);
lif->kern_pid = 0;
-
dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
if (!lif->kern_dbpage) {
dev_err(dev, "Cannot map dbpage, aborting\n");
- err = -ENOMEM;
- goto err_out_free_dbid;
+ return -ENOMEM;
}
err = ionic_lif_adminq_init(lif);
@@ -3186,15 +3277,13 @@ int ionic_lif_init(struct ionic_lif *lif)
return 0;
err_out_notifyq_deinit:
+ napi_disable(&lif->adminqcq->napi);
ionic_lif_qcq_deinit(lif, lif->notifyqcq);
err_out_adminq_deinit:
ionic_lif_qcq_deinit(lif, lif->adminqcq);
ionic_lif_reset(lif);
ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
lif->kern_dbpage = NULL;
-err_out_free_dbid:
- kfree(lif->dbid_inuse);
- lif->dbid_inuse = NULL;
return err;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 9f7ab2f17f93..a53984bf3544 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -135,6 +135,7 @@ enum ionic_lif_state_flags {
IONIC_LIF_F_LINK_CHECK_REQUESTED,
IONIC_LIF_F_FILTER_SYNC_NEEDED,
IONIC_LIF_F_FW_RESET,
+ IONIC_LIF_F_FW_STOPPING,
IONIC_LIF_F_SPLIT_INTR,
IONIC_LIF_F_BROKEN,
IONIC_LIF_F_TX_DIM_INTR,
@@ -213,7 +214,6 @@ struct ionic_lif {
u32 rx_coalesce_hw; /* what the hw is using */
u32 tx_coalesce_usecs; /* what the user asked for */
u32 tx_coalesce_hw; /* what the hw is using */
- unsigned long *dbid_inuse;
unsigned int dbid_count;
struct ionic_phc *phc;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 875f4ec42efe..4029b4e021f8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
}
}
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr)
+{
+ switch (attr) {
+ case IONIC_VF_ATTR_SPOOFCHK:
+ return "IONIC_VF_ATTR_SPOOFCHK";
+ case IONIC_VF_ATTR_TRUST:
+ return "IONIC_VF_ATTR_TRUST";
+ case IONIC_VF_ATTR_LINKSTATE:
+ return "IONIC_VF_ATTR_LINKSTATE";
+ case IONIC_VF_ATTR_MAC:
+ return "IONIC_VF_ATTR_MAC";
+ case IONIC_VF_ATTR_VLAN:
+ return "IONIC_VF_ATTR_VLAN";
+ case IONIC_VF_ATTR_RATE:
+ return "IONIC_VF_ATTR_RATE";
+ case IONIC_VF_ATTR_STATSADDR:
+ return "IONIC_VF_ATTR_STATSADDR";
+ default:
+ return "IONIC_VF_ATTR_UNKNOWN";
+ }
+}
+
static void ionic_adminq_flush(struct ionic_lif *lif)
{
struct ionic_desc_info *desc_info;
@@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
u8 status, int err)
{
+ const char *stat_str;
+
+ stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+ ionic_error_to_str(status);
+
netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n",
- ionic_opcode_to_str(opcode), opcode,
- ionic_error_to_str(status), err);
+ ionic_opcode_to_str(opcode), opcode, stat_str, err);
}
static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
name, ctx->cmd.cmd.opcode, err);
+ ctx->comp.comp.status = IONIC_RC_ERROR;
return err;
}
@@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
if (remaining)
break;
- /* interrupt the wait if FW stopped */
- if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
+ /* force a check of FW status and break out if FW reset */
+ (void)ionic_heartbeat_check(lif->ionic);
+ if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+ !lif->ionic->idev.fw_status_ready) ||
+ test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
if (do_msg)
- netdev_err(netdev, "%s (%d) interrupted, FW in reset\n",
- name, ctx->cmd.cmd.opcode);
+ netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n",
+ name, ctx->cmd.cmd.opcode);
+ ctx->comp.comp.status = IONIC_RC_ERROR;
return -ENXIO;
}
@@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *
static void ionic_dev_cmd_clean(struct ionic *ionic)
{
- union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+ struct ionic_dev *idev = &ionic->idev;
- iowrite32(0, &regs->doorbell);
- memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+ iowrite32(0, &idev->dev_cmd_regs->doorbell);
+ memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
}
-int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+ int err)
+{
+ const char *stat_str;
+
+ stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+ ionic_error_to_str(status);
+
+ dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+ ionic_opcode_to_str(opcode), opcode, stat_str, err);
+}
+
+static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
+ const bool do_msg)
{
struct ionic_dev *idev = &ionic->idev;
unsigned long start_time;
unsigned long max_wait;
unsigned long duration;
+ int done = 0;
+ bool fw_up;
int opcode;
- int hb = 0;
- int done;
int err;
/* Wait for dev cmd to complete, retrying if we get EAGAIN,
@@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
try_again:
opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode);
start_time = jiffies;
- do {
+ for (fw_up = ionic_is_fw_running(idev);
+ !done && fw_up && time_before(jiffies, max_wait);
+ fw_up = ionic_is_fw_running(idev)) {
done = ionic_dev_cmd_done(idev);
if (done)
break;
usleep_range(100, 200);
-
- /* Don't check the heartbeat on FW_CONTROL commands as they are
- * notorious for interrupting the firmware's heartbeat update.
- */
- if (opcode != IONIC_CMD_FW_CONTROL)
- hb = ionic_heartbeat_check(ionic);
- } while (!done && !hb && time_before(jiffies, max_wait));
+ }
duration = jiffies - start_time;
dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
ionic_opcode_to_str(opcode), opcode,
done, duration / HZ, duration);
- if (!done && hb) {
- /* It is possible (but unlikely) that FW was busy and missed a
- * heartbeat check but is still alive and will process this
- * request, so don't clean the dev_cmd in this case.
- */
- dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
- ionic_opcode_to_str(opcode), opcode);
+ if (!done && !fw_up) {
+ ionic_dev_cmd_clean(ionic);
+ dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n",
+ ionic_opcode_to_str(opcode), opcode);
return -ENXIO;
}
@@ -444,9 +481,9 @@ try_again:
}
if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
- dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
- ionic_opcode_to_str(opcode), opcode,
- ionic_error_to_str(err), err);
+ if (do_msg)
+ ionic_dev_cmd_dev_err_print(ionic, opcode, err,
+ ionic_error_to_errno(err));
return ionic_error_to_errno(err);
}
@@ -454,6 +491,16 @@ try_again:
return 0;
}
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+ return __ionic_dev_cmd_wait(ionic, max_seconds, true);
+}
+
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds)
+{
+ return __ionic_dev_cmd_wait(ionic, max_seconds, false);
+}
+
int ionic_setup(struct ionic *ionic)
{
int err;
@@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic)
struct ionic_dev *idev = &ionic->idev;
int err;
+ if (!ionic_is_fw_running(idev))
+ return 0;
+
mutex_lock(&ionic->dev_cmd_lock);
ionic_dev_cmd_reset(idev);
err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
@@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic)
int ionic_port_reset(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
- int err;
+ int err = 0;
if (!idev->port_info)
return 0;
- mutex_lock(&ionic->dev_cmd_lock);
- ionic_dev_cmd_port_reset(idev);
- err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
- mutex_unlock(&ionic->dev_cmd_lock);
+ if (ionic_is_fw_running(idev)) {
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_reset(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ }
dma_free_coherent(ionic->dev, idev->port_info_sz,
idev->port_info, idev->port_info_pa);
@@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic)
idev->port_info = NULL;
idev->port_info_pa = 0;
- if (err)
- dev_err(ionic->dev, "Failed to reset port\n");
-
return err;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
index f6e785f949f9..b7363376dfc8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif,
spin_unlock_bh(&lif->rx_filters.lock);
- if (err == -ENOSPC) {
- if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
- lif->max_vlans = lif->nvlans;
+ /* store the max_vlans limit that we found */
+ if (err == -ENOSPC &&
+ le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
+ lif->max_vlans = lif->nvlans;
+
+ /* Prevent unnecessary error messages on recoverable
+ * errors as the filter will get retried on the next
+ * sync attempt.
+ */
+ switch (err) {
+ case -ENOSPC:
+ case -ENXIO:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ case -EBUSY:
return 0;
+ default:
+ break;
}
ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
@@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif,
spin_unlock_bh(&lif->rx_filters.lock);
if (state != IONIC_FILTER_STATE_NEW) {
- err = ionic_adminq_post_wait(lif, &ctx);
- if (err && err != -EEXIST)
+ err = ionic_adminq_post_wait_nomsg(lif, &ctx);
+
+ switch (err) {
+ /* ignore these errors */
+ case -EEXIST:
+ case -ENXIO:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ case -EBUSY:
+ case 0:
+ break;
+ default:
+ ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
+ ctx.comp.comp.status, err);
return err;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 94384f5d2a22..d197a70a49c9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -669,27 +669,37 @@ dma_fail:
return -EIO;
}
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+ struct ionic_desc_info *desc_info)
+{
+ struct ionic_buf_info *buf_info = desc_info->bufs;
+ struct device *dev = q->dev;
+ unsigned int i;
+
+ if (!desc_info->nbufs)
+ return;
+
+ dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+ buf_info++;
+ for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+ dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+ buf_info->len, DMA_TO_DEVICE);
+
+ desc_info->nbufs = 0;
+}
+
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_desc_info *desc_info,
struct ionic_cq_info *cq_info,
void *cb_arg)
{
- struct ionic_buf_info *buf_info = desc_info->bufs;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
struct sk_buff *skb = cb_arg;
- struct device *dev = q->dev;
- unsigned int i;
u16 qi;
- if (desc_info->nbufs) {
- dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
- buf_info->len, DMA_TO_DEVICE);
- buf_info++;
- for (i = 1; i < desc_info->nbufs; i++, buf_info++)
- dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
- buf_info->len, DMA_TO_DEVICE);
- }
+ ionic_tx_desc_unmap_bufs(q, desc_info);
if (!skb)
return;
@@ -931,8 +941,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
err = ionic_tx_tcp_inner_pseudo_csum(skb);
else
err = ionic_tx_tcp_pseudo_csum(skb);
- if (err)
+ if (err) {
+ /* clean up mapping from ionic_tx_map_skb */
+ ionic_tx_desc_unmap_bufs(q, desc_info);
return err;
+ }
if (encap)
hdrlen = skb_inner_transport_header(skb) - skb->data +
@@ -1003,8 +1016,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
return 0;
}
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_desc *desc = desc_info->txq_desc;
struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1038,12 +1051,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
stats->crc32_csum++;
else
stats->csum++;
-
- return 0;
}
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_desc *desc = desc_info->txq_desc;
struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1074,12 +1085,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
desc->csum_offset = 0;
stats->csum_none++;
-
- return 0;
}
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
- struct ionic_desc_info *desc_info)
+static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+ struct ionic_desc_info *desc_info)
{
struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
struct ionic_buf_info *buf_info = &desc_info->bufs[1];
@@ -1093,31 +1102,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
}
stats->frags += skb_shinfo(skb)->nr_frags;
-
- return 0;
}
static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
{
struct ionic_desc_info *desc_info = &q->info[q->head_idx];
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- int err;
if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
return -EIO;
/* set up the initial descriptor */
if (skb->ip_summed == CHECKSUM_PARTIAL)
- err = ionic_tx_calc_csum(q, skb, desc_info);
+ ionic_tx_calc_csum(q, skb, desc_info);
else
- err = ionic_tx_calc_no_csum(q, skb, desc_info);
- if (err)
- return err;
+ ionic_tx_calc_no_csum(q, skb, desc_info);
/* add frags */
- err = ionic_tx_skb_frags(q, skb, desc_info);
- if (err)
- return err;
+ ionic_tx_skb_frags(q, skb, desc_info);
skb_tx_timestamp(skb);
stats->pkts++;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index cc4ec2bb36db..672480c9d195 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3098,6 +3098,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
continue;
}
+ /* Some flows may keep variable set */
+ p_hwfn->mcp_info->mcp_handling_status = 0;
+
rc = qed_calc_hw_mode(p_hwfn);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index da1eadabcb41..9fb1fa479d4b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -140,7 +140,7 @@ static struct qed_mcp_cmd_elem *qed_mcp_cmd_get_elem(struct qed_hwfn *p_hwfn,
int qed_mcp_free(struct qed_hwfn *p_hwfn)
{
if (p_hwfn->mcp_info) {
- struct qed_mcp_cmd_elem *p_cmd_elem, *p_tmp;
+ struct qed_mcp_cmd_elem *p_cmd_elem = NULL, *p_tmp;
kfree(p_hwfn->mcp_info->mfw_mb_cur);
kfree(p_hwfn->mcp_info->mfw_mb_shadow);
@@ -249,6 +249,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
/* Initialize the MFW spinlock */
spin_lock_init(&p_info->cmd_lock);
spin_lock_init(&p_info->link_lock);
+ spin_lock_init(&p_info->unload_lock);
INIT_LIST_HEAD(&p_info->cmd_list);
@@ -614,12 +615,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
usecs);
}
-int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u32 cmd,
- u32 param,
- u32 *o_mcp_resp,
- u32 *o_mcp_param)
+static int _qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param,
+ bool can_sleep)
{
struct qed_mcp_mb_params mb_params;
int rc;
@@ -627,6 +629,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
memset(&mb_params, 0, sizeof(mb_params));
mb_params.cmd = cmd;
mb_params.param = param;
+ mb_params.flags = can_sleep ? QED_MB_FLAG_CAN_SLEEP : 0;
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
if (rc)
@@ -638,6 +641,28 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
return 0;
}
+int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param)
+{
+ return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param,
+ o_mcp_resp, o_mcp_param, true));
+}
+
+int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param)
+{
+ return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param,
+ o_mcp_resp, o_mcp_param, false));
+}
+
static int
qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
@@ -1071,10 +1096,15 @@ int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
return 0;
}
+#define MFW_COMPLETION_MAX_ITER 5000
+#define MFW_COMPLETION_INTERVAL_MS 1
+
int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_mb_params mb_params;
+ u32 cnt = MFW_COMPLETION_MAX_ITER;
u32 wol_param;
+ int rc;
switch (p_hwfn->cdev->wol_config) {
case QED_OV_WOL_DISABLED:
@@ -1097,7 +1127,23 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
mb_params.param = wol_param;
mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
- return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ spin_lock_bh(&p_hwfn->mcp_info->unload_lock);
+ set_bit(QED_MCP_BYPASS_PROC_BIT,
+ &p_hwfn->mcp_info->mcp_handling_status);
+ spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+
+ while (test_bit(QED_MCP_IN_PROCESSING_BIT,
+ &p_hwfn->mcp_info->mcp_handling_status) && --cnt)
+ msleep(MFW_COMPLETION_INTERVAL_MS);
+
+ if (!cnt)
+ DP_NOTICE(p_hwfn,
+ "Failed to wait MFW event completion after %d msec\n",
+ MFW_COMPLETION_MAX_ITER * MFW_COMPLETION_INTERVAL_MS);
+
+ return rc;
}
int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -1728,8 +1774,8 @@ static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
qed_configure_pf_max_bandwidth(p_hwfn->cdev, p_info->bandwidth_max);
/* Acknowledge the MFW */
- qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp,
- &param);
+ qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp,
+ &param);
}
static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -1766,8 +1812,8 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode);
/* Acknowledge the MFW */
- qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
- &resp, &param);
+ qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
+ &resp, &param);
}
static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
@@ -1997,6 +2043,19 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
"Msg [%d] - old CMD 0x%02x, new CMD 0x%02x\n",
i, info->mfw_mb_shadow[i], info->mfw_mb_cur[i]);
+ spin_lock_bh(&p_hwfn->mcp_info->unload_lock);
+ if (test_bit(QED_MCP_BYPASS_PROC_BIT,
+ &p_hwfn->mcp_info->mcp_handling_status)) {
+ spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+ DP_INFO(p_hwfn,
+ "Msg [%d] is bypassed on unload flow\n", i);
+ continue;
+ }
+
+ set_bit(QED_MCP_IN_PROCESSING_BIT,
+ &p_hwfn->mcp_info->mcp_handling_status);
+ spin_unlock_bh(&p_hwfn->mcp_info->unload_lock);
+
switch (i) {
case MFW_DRV_MSG_LINK_CHANGE:
qed_mcp_handle_link_change(p_hwfn, p_ptt, false);
@@ -2050,6 +2109,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
DP_INFO(p_hwfn, "Unimplemented MFW message %d\n", i);
rc = -EINVAL;
}
+
+ clear_bit(QED_MCP_IN_PROCESSING_BIT,
+ &p_hwfn->mcp_info->mcp_handling_status);
}
/* ACK everything */
@@ -3675,8 +3737,8 @@ static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn,
{
int rc;
- rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, param,
- p_mcp_resp, p_mcp_param);
+ rc = qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD,
+ param, p_mcp_resp, p_mcp_param);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 369e1892450a..9bd0565fe8ab 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -393,11 +393,12 @@ int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u32 *p_board_config);
/**
- * qed_mcp_cmd(): General function for sending commands to the MCP
+ * qed_mcp_cmd(): Sleepable function for sending commands to the MCP
* mailbox. It acquire mutex lock for the entire
* operation, from sending the request until the MCP
* response. Waiting for MCP response will be checked up
- * to 5 seconds every 5ms.
+ * to 5 seconds every 10ms. Should not be called from atomic
+ * context.
*
* @p_hwfn: HW device data.
* @p_ptt: PTT required for register access.
@@ -417,6 +418,31 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
u32 *o_mcp_param);
/**
+ * qed_mcp_cmd_nosleep(): Function for sending commands to the MCP
+ * mailbox. It acquire mutex lock for the entire
+ * operation, from sending the request until the MCP
+ * response. Waiting for MCP response will be checked up
+ * to 5 seconds every 10us. Should be called when sleep
+ * is not allowed.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @cmd: command to be sent to the MCP.
+ * @param: Optional param
+ * @o_mcp_resp: The MCP response code (exclude sequence).
+ * @o_mcp_param: Optional parameter provided by the MCP
+ * response
+ *
+ * Return: Int - 0 - Operation was successul.
+ */
+int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param);
+
+/**
* qed_mcp_drain(): drains the nig, allowing completion to pass in
* case of pauses.
* (Should be called only from sleepable context)
@@ -762,6 +788,14 @@ struct qed_mcp_info {
/* S/N for debug data mailbox commands */
atomic_t dbg_data_seq;
+
+ /* Spinlock used to sync the flag mcp_handling_status with
+ * the mfw events handler
+ */
+ spinlock_t unload_lock;
+ unsigned long mcp_handling_status;
+#define QED_MCP_BYPASS_PROC_BIT 0
+#define QED_MCP_IN_PROCESSING_BIT 1
};
struct qed_mcp_mb_params {
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 19e2621e0645..67014eb76969 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2667,10 +2667,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
rtl_eri_set_bits(tp, 0xd4, 0x0c00);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
- rtl_eri_set_bits(tp, 0xd4, 0x1f80);
- break;
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
break;
default:
@@ -2678,13 +2675,48 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
}
}
+static void rtl_disable_exit_l1(struct rtl8169_private *tp)
+{
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+ rtl_eri_clear_bits(tp, 0xd4, 0x1f00);
+ break;
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+ r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0);
+ break;
+ default:
+ break;
+ }
+}
+
static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
{
/* Don't enable ASPM in the chip if OS can't control ASPM */
if (enable && tp->aspm_manageable) {
RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ /* reset ephy tx/rx disable timer */
+ r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
+ /* chip can trigger L1.2 */
+ r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2));
+ break;
+ default:
+ break;
+ }
} else {
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
+ break;
+ default:
+ break;
+ }
+
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
}
@@ -4683,7 +4715,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
rtl_pci_commit(tp);
rtl8169_cleanup(tp, true);
-
+ rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
}
@@ -4843,8 +4875,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
rtl8169_down(tp);
}
-#ifdef CONFIG_PM
-
static int rtl8169_runtime_resume(struct device *dev)
{
struct rtl8169_private *tp = dev_get_drvdata(dev);
@@ -4860,7 +4890,7 @@ static int rtl8169_runtime_resume(struct device *dev)
return 0;
}
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_suspend(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
@@ -4873,7 +4903,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device)
return 0;
}
-static int __maybe_unused rtl8169_resume(struct device *device)
+static int rtl8169_resume(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
@@ -4915,13 +4945,11 @@ static int rtl8169_runtime_idle(struct device *device)
}
static const struct dev_pm_ops rtl8169_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
- SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
- rtl8169_runtime_idle)
+ SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+ RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+ rtl8169_runtime_idle)
};
-#endif /* CONFIG_PM */
-
static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
{
/* WoL fails with 8168b when the receiver is disabled. */
@@ -5255,6 +5283,16 @@ done:
rtl_rar_set(tp, mac_addr);
}
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+ r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+ return true;
+
+ return false;
+}
+
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct rtl8169_private *tp;
@@ -5333,7 +5371,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Chips from RTL8168h partially have issues with L1.2, but seem
* to work fine with L1 and L1.1.
*/
- if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+ if (rtl_aspm_is_safe(tp))
+ rc = 0;
+ else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
else
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
@@ -5460,9 +5500,7 @@ static struct pci_driver rtl8169_pci_driver = {
.probe = rtl_init_one,
.remove = rtl_remove_one,
.shutdown = rtl_shutdown,
-#ifdef CONFIG_PM
- .driver.pm = &rtl8169_pm_ops,
-#endif
+ .driver.pm = pm_ptr(&rtl8169_pm_ops),
};
module_pci_driver(rtl8169_pci_driver);
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index f7ad5487879b..15c295f90196 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -429,15 +429,6 @@ static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
{ 0x0d, 0xf880 }
};
-static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
- { 0x1f, 0x0002 },
- { 0x05, 0x669a },
- { 0x1f, 0x0005 },
- { 0x05, 0x8330 },
- { 0x06, 0x669a },
- { 0x1f, 0x0002 }
-};
-
static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
struct phy_device *phydev,
u16 val)
@@ -455,6 +446,29 @@ static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
r8169_apply_firmware(tp);
}
+static void rtl8168d_1_common(struct phy_device *phydev)
+{
+ u16 val;
+
+ phy_write_paged(phydev, 0x0002, 0x05, 0x669a);
+ r8168d_phy_param(phydev, 0x8330, 0xffff, 0x669a);
+ phy_write(phydev, 0x1f, 0x0002);
+
+ val = phy_read(phydev, 0x0d);
+
+ if ((val & 0x00ff) != 0x006c) {
+ static const u16 set[] = {
+ 0x0065, 0x0066, 0x0067, 0x0068,
+ 0x0069, 0x006a, 0x006b, 0x006c
+ };
+ int i;
+
+ val &= 0xff00;
+ for (i = 0; i < ARRAY_SIZE(set); i++)
+ phy_write(phydev, 0x0d, val | set[i]);
+ }
+}
+
static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -469,25 +483,7 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
phy_modify(phydev, 0x0c, 0x5d00, 0xa200);
if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- int val;
-
- rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
-
- val = phy_read(phydev, 0x0d);
-
- if ((val & 0x00ff) != 0x006c) {
- static const u32 set[] = {
- 0x0065, 0x0066, 0x0067, 0x0068,
- 0x0069, 0x006a, 0x006b, 0x006c
- };
- int i;
-
- phy_write(phydev, 0x1f, 0x0002);
-
- val &= 0xff00;
- for (i = 0; i < ARRAY_SIZE(set); i++)
- phy_write(phydev, 0x0d, val | set[i]);
- }
+ rtl8168d_1_common(phydev);
} else {
phy_write_paged(phydev, 0x0002, 0x05, 0x6662);
r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662);
@@ -513,24 +509,7 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp,
rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- int val;
-
- rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
-
- val = phy_read(phydev, 0x0d);
- if ((val & 0x00ff) != 0x006c) {
- static const u32 set[] = {
- 0x0065, 0x0066, 0x0067, 0x0068,
- 0x0069, 0x006a, 0x006b, 0x006c
- };
- int i;
-
- phy_write(phydev, 0x1f, 0x0002);
-
- val &= 0xff00;
- for (i = 0; i < ARRAY_SIZE(set); i++)
- phy_write(phydev, 0x0d, val | set[i]);
- }
+ rtl8168d_1_common(phydev);
} else {
phy_write_paged(phydev, 0x0002, 0x05, 0x2642);
r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index b215cde68e10..24e2635c4c80 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1432,11 +1432,7 @@ static int ravb_phy_init(struct net_device *ndev)
* at this time.
*/
if (soc_device_match(r8a7795es10)) {
- err = phy_set_max_speed(phydev, SPEED_100);
- if (err) {
- netdev_err(ndev, "failed to limit PHY to 100Mbit/s\n");
- goto err_phy_disconnect;
- }
+ phy_set_max_speed(phydev, SPEED_100);
netdev_info(ndev, "limited PHY to 100Mbit/s\n");
}
@@ -1457,8 +1453,6 @@ static int ravb_phy_init(struct net_device *ndev)
return 0;
-err_phy_disconnect:
- phy_disconnect(phydev);
err_deregister_fixed_link:
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
@@ -2854,7 +2848,6 @@ static int ravb_wol_restore(struct net_device *ndev)
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- int ret;
if (info->nc_queues)
napi_enable(&priv->napi[RAVB_NC]);
@@ -2863,9 +2856,7 @@ static int ravb_wol_restore(struct net_device *ndev)
/* Disable MagicPacket */
ravb_modify(ndev, ECMR, ECMR_MPDE, 0);
- ret = ravb_close(ndev);
- if (ret < 0)
- return ret;
+ ravb_close(ndev);
return disable_irq_wake(priv->emac_irq);
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d947a628e166..67ade78fb767 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2026,14 +2026,8 @@ static int sh_eth_phy_init(struct net_device *ndev)
}
/* mask with MAC supported features */
- if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) {
- int err = phy_set_max_speed(phydev, SPEED_100);
- if (err) {
- netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n");
- phy_disconnect(phydev);
- return err;
- }
- }
+ if (mdp->cd->register_type != SH_ETH_REG_GIGABIT)
+ phy_set_max_speed(phydev, SPEED_100);
phy_attached_info(phydev);
@@ -3450,9 +3444,7 @@ static int sh_eth_wol_restore(struct net_device *ndev)
* both be reset and all registers restored. This is what
* happens during suspend and resume without WoL enabled.
*/
- ret = sh_eth_close(ndev);
- if (ret < 0)
- return ret;
+ sh_eth_close(ndev);
ret = sh_eth_open(ndev);
if (ret < 0)
return ret;
@@ -3464,7 +3456,7 @@ static int sh_eth_suspend(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct sh_eth_private *mdp = netdev_priv(ndev);
- int ret = 0;
+ int ret;
if (!netif_running(ndev))
return 0;
@@ -3483,7 +3475,7 @@ static int sh_eth_resume(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct sh_eth_private *mdp = netdev_priv(ndev);
- int ret = 0;
+ int ret;
if (!netif_running(ndev))
return 0;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 32161a56726c..77a0d9d7e65a 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
/* MAC core supports the EEE feature. */
if (priv->hw_cap.eee) {
/* Check if the PHY supports EEE */
- if (phy_init_eee(ndev->phydev, 1))
+ if (phy_init_eee(ndev->phydev, true))
return false;
priv->eee_active = 1;
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index cf366ed2557c..50d535981a35 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3990,6 +3990,30 @@ static unsigned int ef10_check_caps(const struct efx_nic *efx,
}
}
+static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx)
+{
+ unsigned int ret = EFX_RECYCLE_RING_SIZE_10G;
+
+ /* There is no difference between PFs and VFs. The side is based on
+ * the maximum link speed of a given NIC.
+ */
+ switch (efx->pci_dev->device & 0xfff) {
+ case 0x0903: /* Farmingdale can do up to 10G */
+ break;
+ case 0x0923: /* Greenport can do up to 40G */
+ case 0x0a03: /* Medford can do up to 40G */
+ ret *= 4;
+ break;
+ default: /* Medford2 can do up to 100G */
+ ret *= 10;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ ret *= 4;
+
+ return ret;
+}
+
#define EF10_OFFLOAD_FEATURES \
(NETIF_F_IP_CSUM | \
NETIF_F_HW_VLAN_CTAG_FILTER | \
@@ -4106,6 +4130,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.check_caps = ef10_check_caps,
.print_additional_fwver = efx_ef10_print_additional_fwver,
.sensor_event = efx_mcdi_sensor_event,
+ .rx_recycle_ring_size = efx_ef10_recycle_ring_size,
};
const struct efx_nic_type efx_hunt_a0_nic_type = {
@@ -4243,4 +4268,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.check_caps = ef10_check_caps,
.print_additional_fwver = efx_ef10_print_additional_fwver,
.sensor_event = efx_mcdi_sensor_event,
+ .rx_recycle_ring_size = efx_ef10_recycle_ring_size,
};
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
index f79b14a119ae..a07cbf45a326 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -23,6 +23,7 @@
#include "ef100_rx.h"
#include "ef100_tx.h"
#include "ef100_netdev.h"
+#include "rx_common.h"
#define EF100_MAX_VIS 4096
#define EF100_NUM_MCDI_BUFFERS 1
@@ -696,6 +697,12 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx,
}
}
+static unsigned int efx_ef100_recycle_ring_size(const struct efx_nic *efx)
+{
+ /* Maximum link speed for Riverhead is 100G */
+ return 10 * EFX_RECYCLE_RING_SIZE_10G;
+}
+
/* NIC level access functions
*/
#define EF100_OFFLOAD_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM | \
@@ -770,6 +777,7 @@ const struct efx_nic_type ef100_pf_nic_type = {
.rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config,
.rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config,
.rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+ .rx_recycle_ring_size = efx_ef100_recycle_ring_size,
.reconfigure_mac = ef100_reconfigure_mac,
.reconfigure_port = efx_mcdi_port_reconfigure,
@@ -849,6 +857,7 @@ const struct efx_nic_type ef100_vf_nic_type = {
.rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
.rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config,
.rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+ .rx_recycle_ring_size = efx_ef100_recycle_ring_size,
.reconfigure_mac = ef100_reconfigure_mac,
.test_nvram = efx_new_mcdi_nvram_test_all,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index cc15ee8812d9..c75dc75e2857 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1282,6 +1282,7 @@ struct efx_udp_tunnel {
* @udp_tnl_has_port: Check if a port has been added as UDP tunnel
* @print_additional_fwver: Dump NIC-specific additional FW version info
* @sensor_event: Handle a sensor event from MCDI
+ * @rx_recycle_ring_size: Size of the RX recycle ring
* @revision: Hardware architecture revision
* @txd_ptr_tbl_base: TX descriptor ring base address
* @rxd_ptr_tbl_base: RX descriptor ring base address
@@ -1460,6 +1461,7 @@ struct efx_nic_type {
size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf,
size_t len);
void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev);
+ unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx);
int revision;
unsigned int txd_ptr_tbl_base;
diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h
index b9cafe9cd568..0cef35c0c559 100644
--- a/drivers/net/ethernet/sfc/nic_common.h
+++ b/drivers/net/ethernet/sfc/nic_common.h
@@ -195,6 +195,11 @@ static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev)
efx->type->sensor_event(efx, ev);
}
+static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx)
+{
+ return efx->type->rx_recycle_ring_size(efx);
+}
+
/* Some statistics are computed as A - B where A and B each increase
* linearly with some hardware counter(s) and the counters are read
* asynchronously. If the counters contributing to B are always read
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
index 633ca77a26fd..1b22c7be0088 100644
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -23,13 +23,6 @@ module_param(rx_refill_threshold, uint, 0444);
MODULE_PARM_DESC(rx_refill_threshold,
"RX descriptor ring refill threshold (%)");
-/* Number of RX buffers to recycle pages for. When creating the RX page recycle
- * ring, this number is divided by the number of buffers per page to calculate
- * the number of pages to store in the RX page recycle ring.
- */
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
-
/* RX maximum head room required.
*
* This must be at least 1 to prevent overflow, plus one packet-worth
@@ -141,16 +134,7 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
unsigned int bufs_in_recycle_ring, page_ring_size;
struct efx_nic *efx = rx_queue->efx;
- /* Set the RX recycle ring size */
-#ifdef CONFIG_PPC64
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-#else
- if (iommu_present(&pci_bus_type))
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
- else
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
-#endif /* CONFIG_PPC64 */
-
+ bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
efx->rx_bufs_per_page);
rx_queue->page_ring = kcalloc(page_ring_size,
diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h
index 207ccd8ba062..fbd2769307f9 100644
--- a/drivers/net/ethernet/sfc/rx_common.h
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -18,6 +18,12 @@
#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
EFX_RX_USR_BUF_SIZE)
+/* Number of RX buffers to recycle pages for. When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_10G 256
+
static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
{
return page_address(buf->page) + buf->page_offset;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 16347a6d0c47..ce3060e15b54 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -25,6 +25,7 @@
#include "mcdi_port_common.h"
#include "selftest.h"
#include "siena_sriov.h"
+#include "rx_common.h"
/* Hardware control for SFC9000 family including SFL9021 (aka Siena). */
@@ -958,6 +959,12 @@ static unsigned int siena_check_caps(const struct efx_nic *efx,
return 0;
}
+static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx)
+{
+ /* Maximum link speed is 10G */
+ return EFX_RECYCLE_RING_SIZE_10G;
+}
+
/**************************************************************************
*
* Revision-dependent attributes used by efx.c and nic.c
@@ -1098,4 +1105,5 @@ const struct efx_nic_type siena_a0_nic_type = {
.rx_hash_key_size = 16,
.check_caps = siena_check_caps,
.sensor_event = efx_mcdi_sensor_event,
+ .rx_recycle_ring_size = efx_siena_recycle_ring_size,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 8e8778cfbbad..5943ff9f21c2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -383,10 +383,10 @@ static int intel_crosststamp(ktime_t *device,
/* Repeat until the timestamps are from the FIFO last segment */
for (i = 0; i < num_snapshot; i++) {
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ read_lock_irqsave(&priv->ptp_lock, flags);
stmmac_get_ptptime(priv, ptpaddr, &ptp_time);
*device = ns_to_ktime(ptp_time);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ read_unlock_irqrestore(&priv->ptp_lock, flags);
get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time);
*system = convert_art_to_tsc(art_time);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 09644ab0d87a..f86cc83003f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -16,6 +16,7 @@
#include <linux/of_net.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/regulator/consumer.h>
#include <linux/regmap.h>
#include <linux/stmmac.h>
@@ -57,7 +58,6 @@ struct emac_variant {
};
/* struct sunxi_priv_data - hold all sunxi private data
- * @tx_clk: reference to MAC TX clock
* @ephy_clk: reference to the optional EPHY clock for the internal PHY
* @regulator: reference to the optional regulator
* @rst_ephy: reference to the optional EPHY reset for the internal PHY
@@ -68,7 +68,6 @@ struct emac_variant {
* @mux_handle: Internal pointer used by mdio-mux lib
*/
struct sunxi_priv_data {
- struct clk *tx_clk;
struct clk *ephy_clk;
struct regulator *regulator;
struct reset_control *rst_ephy;
@@ -579,22 +578,14 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
}
}
- ret = clk_prepare_enable(gmac->tx_clk);
- if (ret) {
- dev_err(&pdev->dev, "Could not enable AHB clock\n");
- goto err_disable_regulator;
- }
-
if (gmac->use_internal_phy) {
ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev));
if (ret)
- goto err_disable_clk;
+ goto err_disable_regulator;
}
return 0;
-err_disable_clk:
- clk_disable_unprepare(gmac->tx_clk);
err_disable_regulator:
if (gmac->regulator)
regulator_disable(gmac->regulator);
@@ -1043,8 +1034,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
if (gmac->variant->soc_has_internal_phy)
sun8i_dwmac_unpower_internal_phy(gmac);
- clk_disable_unprepare(gmac->tx_clk);
-
if (gmac->regulator)
regulator_disable(gmac->regulator);
}
@@ -1167,12 +1156,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
return -EINVAL;
}
- gmac->tx_clk = devm_clk_get(dev, "stmmaceth");
- if (IS_ERR(gmac->tx_clk)) {
- dev_err(dev, "Could not get TX clock\n");
- return PTR_ERR(gmac->tx_clk);
- }
-
/* Optional regulator for PHY */
gmac->regulator = devm_regulator_get_optional(dev, "phy");
if (IS_ERR(gmac->regulator)) {
@@ -1254,6 +1237,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
ndev = dev_get_drvdata(&pdev->dev);
priv = netdev_priv(ndev);
+ /* the MAC is runtime suspended after stmmac_dvr_probe(), so we
+ * need to ensure the MAC resume back before other operations such
+ * as reset.
+ */
+ pm_runtime_get_sync(&pdev->dev);
+
/* The mux must be registered after parent MDIO
* so after stmmac_dvr_probe()
*/
@@ -1272,12 +1261,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
goto dwmac_remove;
}
+ pm_runtime_put(&pdev->dev);
+
return 0;
dwmac_mux:
reset_control_put(gmac->rst_ephy);
clk_put(gmac->ephy_clk);
dwmac_remove:
+ pm_runtime_put_noidle(&pdev->dev);
stmmac_dvr_remove(&pdev->dev);
dwmac_exit:
sun8i_dwmac_exit(pdev, gmac);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 5b195d5051d6..57970ae2178d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -263,7 +263,7 @@ struct stmmac_priv {
u32 adv_ts;
int use_riwt;
int irq_wake;
- spinlock_t ptp_lock;
+ rwlock_t ptp_lock;
/* Protects auxiliary snapshot registers from concurrent access. */
struct mutex aux_ts_lock;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index a7ec9f4d46ce..22fea0f67245 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -196,9 +196,9 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
GMAC_TIMESTAMP_ATSNS_SHIFT;
for (i = 0; i < num_snapshot; i++) {
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ read_lock_irqsave(&priv->ptp_lock, flags);
get_ptptime(priv->ptpaddr, &ptp_time);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ read_unlock_irqrestore(&priv->ptp_lock, flags);
event.type = PTP_CLOCK_EXTTS;
event.index = 0;
event.timestamp = ptp_time;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bde76ea2deec..b745d624b2cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
priv->pause, tx_cnt);
}
-static void stmmac_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
+static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
- int tx_cnt = priv->plat->tx_queues_to_use;
- int max_speed = priv->plat->max_speed;
-
- phylink_set(mac_supported, 10baseT_Half);
- phylink_set(mac_supported, 10baseT_Full);
- phylink_set(mac_supported, 100baseT_Half);
- phylink_set(mac_supported, 100baseT_Full);
- phylink_set(mac_supported, 1000baseT_Half);
- phylink_set(mac_supported, 1000baseT_Full);
- phylink_set(mac_supported, 1000baseKX_Full);
-
- phylink_set(mac_supported, Autoneg);
- phylink_set(mac_supported, Pause);
- phylink_set(mac_supported, Asym_Pause);
- phylink_set_port_modes(mac_supported);
-
- /* Cut down 1G if asked to */
- if ((max_speed > 0) && (max_speed < 1000)) {
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
- } else if (priv->plat->has_gmac4) {
- if (!max_speed || max_speed >= 2500) {
- phylink_set(mac_supported, 2500baseT_Full);
- phylink_set(mac_supported, 2500baseX_Full);
- }
- } else if (priv->plat->has_xgmac) {
- if (!max_speed || (max_speed >= 2500)) {
- phylink_set(mac_supported, 2500baseT_Full);
- phylink_set(mac_supported, 2500baseX_Full);
- }
- if (!max_speed || (max_speed >= 5000)) {
- phylink_set(mac_supported, 5000baseT_Full);
- }
- if (!max_speed || (max_speed >= 10000)) {
- phylink_set(mac_supported, 10000baseSR_Full);
- phylink_set(mac_supported, 10000baseLR_Full);
- phylink_set(mac_supported, 10000baseER_Full);
- phylink_set(mac_supported, 10000baseLRM_Full);
- phylink_set(mac_supported, 10000baseT_Full);
- phylink_set(mac_supported, 10000baseKX4_Full);
- phylink_set(mac_supported, 10000baseKR_Full);
- }
- if (!max_speed || (max_speed >= 25000)) {
- phylink_set(mac_supported, 25000baseCR_Full);
- phylink_set(mac_supported, 25000baseKR_Full);
- phylink_set(mac_supported, 25000baseSR_Full);
- }
- if (!max_speed || (max_speed >= 40000)) {
- phylink_set(mac_supported, 40000baseKR4_Full);
- phylink_set(mac_supported, 40000baseCR4_Full);
- phylink_set(mac_supported, 40000baseSR4_Full);
- phylink_set(mac_supported, 40000baseLR4_Full);
- }
- if (!max_speed || (max_speed >= 50000)) {
- phylink_set(mac_supported, 50000baseCR2_Full);
- phylink_set(mac_supported, 50000baseKR2_Full);
- phylink_set(mac_supported, 50000baseSR2_Full);
- phylink_set(mac_supported, 50000baseKR_Full);
- phylink_set(mac_supported, 50000baseSR_Full);
- phylink_set(mac_supported, 50000baseCR_Full);
- phylink_set(mac_supported, 50000baseLR_ER_FR_Full);
- phylink_set(mac_supported, 50000baseDR_Full);
- }
- if (!max_speed || (max_speed >= 100000)) {
- phylink_set(mac_supported, 100000baseKR4_Full);
- phylink_set(mac_supported, 100000baseSR4_Full);
- phylink_set(mac_supported, 100000baseCR4_Full);
- phylink_set(mac_supported, 100000baseLR4_ER4_Full);
- phylink_set(mac_supported, 100000baseKR2_Full);
- phylink_set(mac_supported, 100000baseSR2_Full);
- phylink_set(mac_supported, 100000baseCR2_Full);
- phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full);
- phylink_set(mac_supported, 100000baseDR2_Full);
- }
- }
-
- /* Half-Duplex can only work with single queue */
- if (tx_cnt > 1) {
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 1000baseT_Half);
- }
-
- linkmode_and(supported, supported, mac_supported);
- linkmode_andnot(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mac_supported);
- linkmode_andnot(state->advertising, state->advertising, mask);
+ if (!priv->hw->xpcs)
+ return NULL;
- /* If PCS is supported, check which modes it supports. */
- if (priv->hw->xpcs)
- xpcs_validate(priv->hw->xpcs, supported, state);
+ return &priv->hw->xpcs->pcs;
}
static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
}
static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
- .validate = stmmac_validate,
+ .validate = phylink_generic_validate,
+ .mac_select_pcs = stmmac_mac_select_pcs,
.mac_config = stmmac_mac_config,
.mac_link_down = stmmac_mac_link_down,
.mac_link_up = stmmac_mac_link_up,
@@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
{
struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+ int max_speed = priv->plat->max_speed;
int mode = priv->plat->phy_interface;
struct phylink *phylink;
priv->phylink_config.dev = &priv->dev->dev;
priv->phylink_config.type = PHYLINK_NETDEV;
- priv->phylink_config.pcs_poll = true;
if (priv->plat->mdio_bus_data)
priv->phylink_config.ovr_an_inband =
mdio_bus_data->xpcs_an_inband;
@@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
if (!fwnode)
fwnode = dev_fwnode(priv->device);
+ /* Set the platform/firmware specified interface mode */
+ __set_bit(mode, priv->phylink_config.supported_interfaces);
+
+ /* If we have an xpcs, it defines which PHY interfaces are supported. */
+ if (priv->hw->xpcs)
+ xpcs_get_interfaces(priv->hw->xpcs,
+ priv->phylink_config.supported_interfaces);
+
+ priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
+
+ if (!max_speed || max_speed >= 1000)
+ priv->phylink_config.mac_capabilities |= MAC_1000;
+
+ if (priv->plat->has_gmac4) {
+ if (!max_speed || max_speed >= 2500)
+ priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ } else if (priv->plat->has_xgmac) {
+ if (!max_speed || max_speed >= 2500)
+ priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ if (!max_speed || max_speed >= 5000)
+ priv->phylink_config.mac_capabilities |= MAC_5000FD;
+ if (!max_speed || max_speed >= 10000)
+ priv->phylink_config.mac_capabilities |= MAC_10000FD;
+ if (!max_speed || max_speed >= 25000)
+ priv->phylink_config.mac_capabilities |= MAC_25000FD;
+ if (!max_speed || max_speed >= 40000)
+ priv->phylink_config.mac_capabilities |= MAC_40000FD;
+ if (!max_speed || max_speed >= 50000)
+ priv->phylink_config.mac_capabilities |= MAC_50000FD;
+ if (!max_speed || max_speed >= 100000)
+ priv->phylink_config.mac_capabilities |= MAC_100000FD;
+ }
+
+ /* Half-Duplex can only work with single queue */
+ if (priv->plat->tx_queues_to_use > 1)
+ priv->phylink_config.mac_capabilities &=
+ ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+
phylink = phylink_create(&priv->phylink_config, fwnode,
mode, &stmmac_phylink_mac_ops);
if (IS_ERR(phylink))
return PTR_ERR(phylink);
- if (priv->hw->xpcs)
- phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
-
priv->phylink = phylink;
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 1c9f02f9c317..e45fb191d8e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -39,9 +39,9 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb)
diff = div_u64(adj, 1000000000ULL);
addend = neg_adj ? (addend - diff) : (addend + diff);
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ write_lock_irqsave(&priv->ptp_lock, flags);
stmmac_config_addend(priv, priv->ptpaddr, addend);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ write_unlock_irqrestore(&priv->ptp_lock, flags);
return 0;
}
@@ -86,9 +86,9 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
mutex_unlock(&priv->plat->est->lock);
}
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ write_lock_irqsave(&priv->ptp_lock, flags);
stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ write_unlock_irqrestore(&priv->ptp_lock, flags);
/* Caculate new basetime and re-configured EST after PTP time adjust. */
if (est_rst) {
@@ -137,9 +137,9 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
unsigned long flags;
u64 ns = 0;
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ read_lock_irqsave(&priv->ptp_lock, flags);
stmmac_get_systime(priv, priv->ptpaddr, &ns);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ read_unlock_irqrestore(&priv->ptp_lock, flags);
*ts = ns_to_timespec64(ns);
@@ -162,9 +162,9 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
container_of(ptp, struct stmmac_priv, ptp_clock_ops);
unsigned long flags;
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ write_lock_irqsave(&priv->ptp_lock, flags);
stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ write_unlock_irqrestore(&priv->ptp_lock, flags);
return 0;
}
@@ -194,12 +194,12 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
cfg->period.tv_sec = rq->perout.period.sec;
cfg->period.tv_nsec = rq->perout.period.nsec;
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ write_lock_irqsave(&priv->ptp_lock, flags);
ret = stmmac_flex_pps_config(priv, priv->ioaddr,
rq->perout.index, cfg, on,
priv->sub_second_inc,
priv->systime_flags);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ write_unlock_irqrestore(&priv->ptp_lock, flags);
break;
case PTP_CLK_REQ_EXTTS:
priv->plat->ext_snapshot_en = on;
@@ -314,7 +314,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n;
- spin_lock_init(&priv->ptp_lock);
+ rwlock_init(&priv->ptp_lock);
mutex_init(&priv->aux_ts_lock);
priv->ptp_clock_ops = stmmac_ptp_clock_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index be3cb63675a5..9f1759593b94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -1777,9 +1777,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv)
if (ret)
return ret;
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ read_lock_irqsave(&priv->ptp_lock, flags);
stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ read_unlock_irqrestore(&priv->ptp_lock, flags);
if (!curr_time) {
ret = -EOPNOTSUPP;
@@ -1799,9 +1799,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv)
goto fail_disable;
/* Check if expected time has elapsed */
- spin_lock_irqsave(&priv->ptp_lock, flags);
+ read_lock_irqsave(&priv->ptp_lock, flags);
stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
- spin_unlock_irqrestore(&priv->ptp_lock, flags);
+ read_unlock_irqrestore(&priv->ptp_lock, flags);
if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET)
ret = -EINVAL;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 5b4d153b1492..40108968b350 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -386,6 +386,7 @@ struct axidma_bd {
* @phylink: Pointer to phylink instance
* @phylink_config: phylink configuration settings
* @pcs_phy: Reference to PCS/PMA PHY if used
+ * @pcs: phylink pcs structure for PCS PHY
* @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
* @axi_clk: AXI4-Lite bus clock
* @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
@@ -434,6 +435,7 @@ struct axienet_local {
struct phylink_config phylink_config;
struct mdio_device *pcs_phy;
+ struct phylink_pcs pcs;
bool switch_x_sgmii;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 377c94ec2486..de0a6372ae0e 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1537,78 +1537,78 @@ static const struct ethtool_ops axienet_ethtool_ops = {
.nway_reset = axienet_ethtools_nway_reset,
};
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
{
- struct net_device *ndev = to_net_dev(config->dev);
- struct axienet_local *lp = netdev_priv(ndev);
+ return container_of(pcs, struct axienet_local, pcs);
+}
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
- break;
- default:
- break;
- }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+ phylink_mii_c22_pcs_get_state(pcs_phy, state);
}
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
{
- struct net_device *ndev = to_net_dev(config->dev);
- struct axienet_local *lp = netdev_priv(ndev);
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
- phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+ phylink_mii_c22_pcs_an_restart(pcs_phy);
}
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
- phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
{
- struct net_device *ndev = to_net_dev(config->dev);
+ struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+ struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
struct axienet_local *lp = netdev_priv(ndev);
int ret;
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- if (!lp->switch_x_sgmii)
- return 0;
-
- ret = mdiobus_write(lp->pcs_phy->bus,
- lp->pcs_phy->addr,
- XLNX_MII_STD_SELECT_REG,
- iface == PHY_INTERFACE_MODE_SGMII ?
+ if (lp->switch_x_sgmii) {
+ ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+ interface == PHY_INTERFACE_MODE_SGMII ?
XLNX_MII_STD_SELECT_SGMII : 0);
- if (ret < 0)
- netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+ if (ret < 0) {
+ netdev_warn(ndev,
+ "Failed to switch PHY interface: %d\n",
ret);
- return ret;
- default:
- return 0;
+ return ret;
+ }
}
+
+ ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+ if (ret < 0)
+ netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+ return ret;
}
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
- const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+ .pcs_get_state = axienet_pcs_get_state,
+ .pcs_config = axienet_pcs_config,
+ .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
{
struct net_device *ndev = to_net_dev(config->dev);
struct axienet_local *lp = netdev_priv(ndev);
- int ret;
- switch (state->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
- state->interface,
- state->advertising);
- if (ret < 0)
- netdev_warn(ndev, "Failed to configure PCS: %d\n",
- ret);
- break;
+ if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+ interface == PHY_INTERFACE_MODE_SGMII)
+ return &lp->pcs;
- default:
- break;
- }
+ return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ /* nothing meaningful to do */
}
static void axienet_mac_link_down(struct phylink_config *config,
@@ -1663,9 +1663,7 @@ static void axienet_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops axienet_phylink_ops = {
.validate = phylink_generic_validate,
- .mac_pcs_get_state = axienet_mac_pcs_get_state,
- .mac_an_restart = axienet_mac_an_restart,
- .mac_prepare = axienet_mac_prepare,
+ .mac_select_pcs = axienet_mac_select_pcs,
.mac_config = axienet_mac_config,
.mac_link_down = axienet_mac_link_down,
.mac_link_up = axienet_mac_link_up,
@@ -2079,12 +2077,12 @@ static int axienet_probe(struct platform_device *pdev)
ret = -EPROBE_DEFER;
goto cleanup_mdio;
}
- lp->phylink_config.pcs_poll = true;
+ lp->pcs.ops = &axienet_pcs_ops;
+ lp->pcs.poll = true;
}
lp->phylink_config.dev = &ndev->dev;
lp->phylink_config.type = PHYLINK_NETDEV;
- lp->phylink_config.legacy_pre_march2020 = true;
lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
MAC_10FD | MAC_100FD | MAC_1000FD;
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index ebd287039a54..5805e4a56385 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1514,10 +1514,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
{
struct acpi_device *device;
bool *found = context;
- int result;
- result = acpi_bus_get_device(obj_handle, &device);
- if (result)
+ device = acpi_fetch_acpi_dev(obj_handle);
+ if (!device)
return AE_OK;
if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index afa81a9480cc..e675d1016c3c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -154,19 +154,15 @@ static void free_netvsc_device(struct rcu_head *head)
kfree(nvdev->extension);
- if (nvdev->recv_original_buf) {
- hv_unmap_memory(nvdev->recv_buf);
+ if (nvdev->recv_original_buf)
vfree(nvdev->recv_original_buf);
- } else {
+ else
vfree(nvdev->recv_buf);
- }
- if (nvdev->send_original_buf) {
- hv_unmap_memory(nvdev->send_buf);
+ if (nvdev->send_original_buf)
vfree(nvdev->send_original_buf);
- } else {
+ else
vfree(nvdev->send_buf);
- }
bitmap_free(nvdev->send_section_map);
@@ -765,6 +761,12 @@ void netvsc_device_remove(struct hv_device *device)
netvsc_teardown_send_gpadl(device, net_device, ndev);
}
+ if (net_device->recv_original_buf)
+ hv_unmap_memory(net_device->recv_buf);
+
+ if (net_device->send_original_buf)
+ hv_unmap_memory(net_device->send_buf);
+
/* Release all resources */
free_netvsc_device_rcu(net_device);
}
@@ -1821,6 +1823,12 @@ cleanup:
netif_napi_del(&net_device->chan_table[0].napi);
cleanup2:
+ if (net_device->recv_original_buf)
+ hv_unmap_memory(net_device->recv_buf);
+
+ if (net_device->send_original_buf)
+ hv_unmap_memory(net_device->send_buf);
+
free_netvsc_device(&net_device->rcu);
return ERR_PTR(ret);
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 2f5e7b31032a..07bafbf94680 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -74,81 +74,6 @@ struct atusb_chip_data {
int (*set_txpower)(struct ieee802154_hw*, s32);
};
-/* ----- USB commands without data ----------------------------------------- */
-
-/* To reduce the number of error checks in the code, we record the first error
- * in atusb->err and reject all subsequent requests until the error is cleared.
- */
-
-static int atusb_control_msg(struct atusb *atusb, unsigned int pipe,
- __u8 request, __u8 requesttype,
- __u16 value, __u16 index,
- void *data, __u16 size, int timeout)
-{
- struct usb_device *usb_dev = atusb->usb_dev;
- int ret;
-
- if (atusb->err)
- return atusb->err;
-
- ret = usb_control_msg(usb_dev, pipe, request, requesttype,
- value, index, data, size, timeout);
- if (ret < size) {
- ret = ret < 0 ? ret : -ENODATA;
-
- atusb->err = ret;
- dev_err(&usb_dev->dev,
- "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n",
- __func__, request, value, index, ret);
- }
- return ret;
-}
-
-static int atusb_command(struct atusb *atusb, u8 cmd, u8 arg)
-{
- struct usb_device *usb_dev = atusb->usb_dev;
-
- dev_dbg(&usb_dev->dev, "%s: cmd = 0x%x\n", __func__, cmd);
- return atusb_control_msg(atusb, usb_sndctrlpipe(usb_dev, 0),
- cmd, ATUSB_REQ_TO_DEV, arg, 0, NULL, 0, 1000);
-}
-
-static int atusb_write_reg(struct atusb *atusb, u8 reg, u8 value)
-{
- struct usb_device *usb_dev = atusb->usb_dev;
-
- dev_dbg(&usb_dev->dev, "%s: 0x%02x <- 0x%02x\n", __func__, reg, value);
- return atusb_control_msg(atusb, usb_sndctrlpipe(usb_dev, 0),
- ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
- value, reg, NULL, 0, 1000);
-}
-
-static int atusb_read_reg(struct atusb *atusb, u8 reg)
-{
- struct usb_device *usb_dev = atusb->usb_dev;
- int ret;
- u8 *buffer;
- u8 value;
-
- buffer = kmalloc(1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- dev_dbg(&usb_dev->dev, "%s: reg = 0x%x\n", __func__, reg);
- ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
- ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
- 0, reg, buffer, 1, 1000);
-
- if (ret >= 0) {
- value = buffer[0];
- kfree(buffer);
- return value;
- } else {
- kfree(buffer);
- return ret;
- }
-}
-
static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask,
u8 shift, u8 value)
{
@@ -158,7 +83,10 @@ static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask,
dev_dbg(&usb_dev->dev, "%s: 0x%02x <- 0x%02x\n", __func__, reg, value);
- orig = atusb_read_reg(atusb, reg);
+ ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, reg, &orig, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
/* Write the value only into that part of the register which is allowed
* by the mask. All other bits stay as before.
@@ -167,7 +95,8 @@ static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask,
tmp |= (value << shift) & mask;
if (tmp != orig)
- ret = atusb_write_reg(atusb, reg, tmp);
+ ret = usb_control_msg_send(usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ tmp, reg, NULL, 0, 1000, GFP_KERNEL);
return ret;
}
@@ -176,12 +105,16 @@ static int atusb_read_subreg(struct atusb *lp,
unsigned int addr, unsigned int mask,
unsigned int shift)
{
- int rc;
+ int reg, ret;
+
+ ret = usb_control_msg_recv(lp->usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, addr, &reg, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
- rc = atusb_read_reg(lp, addr);
- rc = (rc & mask) >> shift;
+ reg = (reg & mask) >> shift;
- return rc;
+ return reg;
}
static int atusb_get_and_clear_error(struct atusb *atusb)
@@ -419,16 +352,22 @@ static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw,
u16 addr = le16_to_cpu(filt->short_addr);
dev_vdbg(dev, "%s called for saddr\n", __func__);
- atusb_write_reg(atusb, RG_SHORT_ADDR_0, addr);
- atusb_write_reg(atusb, RG_SHORT_ADDR_1, addr >> 8);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ addr, RG_SHORT_ADDR_0, NULL, 0, 1000, GFP_KERNEL);
+
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ addr >> 8, RG_SHORT_ADDR_1, NULL, 0, 1000, GFP_KERNEL);
}
if (changed & IEEE802154_AFILT_PANID_CHANGED) {
u16 pan = le16_to_cpu(filt->pan_id);
dev_vdbg(dev, "%s called for pan id\n", __func__);
- atusb_write_reg(atusb, RG_PAN_ID_0, pan);
- atusb_write_reg(atusb, RG_PAN_ID_1, pan >> 8);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ pan, RG_PAN_ID_0, NULL, 0, 1000, GFP_KERNEL);
+
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ pan >> 8, RG_PAN_ID_1, NULL, 0, 1000, GFP_KERNEL);
}
if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
@@ -437,7 +376,9 @@ static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw,
memcpy(addr, &filt->ieee_addr, IEEE802154_EXTENDED_ADDR_LEN);
dev_vdbg(dev, "%s called for IEEE addr\n", __func__);
for (i = 0; i < 8; i++)
- atusb_write_reg(atusb, RG_IEEE_ADDR_0 + i, addr[i]);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ addr[i], RG_IEEE_ADDR_0 + i, NULL, 0,
+ 1000, GFP_KERNEL);
}
if (changed & IEEE802154_AFILT_PANC_CHANGED) {
@@ -459,7 +400,8 @@ static int atusb_start(struct ieee802154_hw *hw)
dev_dbg(&usb_dev->dev, "%s\n", __func__);
schedule_delayed_work(&atusb->work, 0);
- atusb_command(atusb, ATUSB_RX_MODE, 1);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RX_MODE, ATUSB_REQ_TO_DEV, 1, 0,
+ NULL, 0, 1000, GFP_KERNEL);
ret = atusb_get_and_clear_error(atusb);
if (ret < 0)
usb_kill_anchored_urbs(&atusb->idle_urbs);
@@ -473,7 +415,8 @@ static void atusb_stop(struct ieee802154_hw *hw)
dev_dbg(&usb_dev->dev, "%s\n", __func__);
usb_kill_anchored_urbs(&atusb->idle_urbs);
- atusb_command(atusb, ATUSB_RX_MODE, 0);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RX_MODE, ATUSB_REQ_TO_DEV, 0, 0,
+ NULL, 0, 1000, GFP_KERNEL);
atusb_get_and_clear_error(atusb);
}
@@ -580,9 +523,11 @@ atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca)
static int hulusb_set_cca_ed_level(struct atusb *lp, int rssi_base_val)
{
- unsigned int cca_ed_thres;
+ int cca_ed_thres;
cca_ed_thres = atusb_read_subreg(lp, SR_CCA_ED_THRES);
+ if (cca_ed_thres < 0)
+ return cca_ed_thres;
switch (rssi_base_val) {
case -98:
@@ -799,18 +744,13 @@ static int atusb_get_and_show_revision(struct atusb *atusb)
{
struct usb_device *usb_dev = atusb->usb_dev;
char *hw_name;
- unsigned char *buffer;
+ unsigned char buffer[3];
int ret;
- buffer = kmalloc(3, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
/* Get a couple of the ATMega Firmware values */
- ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
- ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0,
- buffer, 3, 1000);
- if (ret >= 0) {
+ ret = usb_control_msg_recv(atusb->usb_dev, 0, ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0,
+ buffer, 3, 1000, GFP_KERNEL);
+ if (!ret) {
atusb->fw_ver_maj = buffer[0];
atusb->fw_ver_min = buffer[1];
atusb->fw_hw_type = buffer[2];
@@ -849,7 +789,6 @@ static int atusb_get_and_show_revision(struct atusb *atusb)
dev_info(&usb_dev->dev, "Please update to version 0.2 or newer");
}
- kfree(buffer);
return ret;
}
@@ -863,7 +802,6 @@ static int atusb_get_and_show_build(struct atusb *atusb)
if (!build)
return -ENOMEM;
- /* We cannot call atusb_control_msg() here, since this request may read various length data */
ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD,
ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000);
if (ret >= 0) {
@@ -881,14 +819,27 @@ static int atusb_get_and_conf_chip(struct atusb *atusb)
u8 man_id_0, man_id_1, part_num, version_num;
const char *chip;
struct ieee802154_hw *hw = atusb->hw;
+ int ret;
- man_id_0 = atusb_read_reg(atusb, RG_MAN_ID_0);
- man_id_1 = atusb_read_reg(atusb, RG_MAN_ID_1);
- part_num = atusb_read_reg(atusb, RG_PART_NUM);
- version_num = atusb_read_reg(atusb, RG_VERSION_NUM);
+ ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, RG_MAN_ID_0, &man_id_0, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
- if (atusb->err)
- return atusb->err;
+ ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, RG_MAN_ID_1, &man_id_1, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, RG_PART_NUM, &part_num, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
+ 0, RG_VERSION_NUM, &version_num, 1, 1000, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT |
IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS;
@@ -969,7 +920,7 @@ fail:
static int atusb_set_extended_addr(struct atusb *atusb)
{
struct usb_device *usb_dev = atusb->usb_dev;
- unsigned char *buffer;
+ unsigned char buffer[IEEE802154_EXTENDED_ADDR_LEN];
__le64 extended_addr;
u64 addr;
int ret;
@@ -982,18 +933,12 @@ static int atusb_set_extended_addr(struct atusb *atusb)
return 0;
}
- buffer = kmalloc(IEEE802154_EXTENDED_ADDR_LEN, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
/* Firmware is new enough so we fetch the address from EEPROM */
- ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
- ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0,
- buffer, IEEE802154_EXTENDED_ADDR_LEN, 1000);
+ ret = usb_control_msg_recv(atusb->usb_dev, 0, ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0,
+ buffer, IEEE802154_EXTENDED_ADDR_LEN, 1000, GFP_KERNEL);
if (ret < 0) {
dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n");
ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
- kfree(buffer);
return ret;
}
@@ -1009,7 +954,6 @@ static int atusb_set_extended_addr(struct atusb *atusb)
&addr);
}
- kfree(buffer);
return ret;
}
@@ -1051,7 +995,8 @@ static int atusb_probe(struct usb_interface *interface,
hw->parent = &usb_dev->dev;
- atusb_command(atusb, ATUSB_RF_RESET, 0);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RF_RESET, ATUSB_REQ_TO_DEV, 0, 0,
+ NULL, 0, 1000, GFP_KERNEL);
atusb_get_and_conf_chip(atusb);
atusb_get_and_show_revision(atusb);
atusb_get_and_show_build(atusb);
@@ -1076,7 +1021,9 @@ static int atusb_probe(struct usb_interface *interface,
* explicitly. Any resets after that will send us straight to TRX_OFF,
* making the command below redundant.
*/
- atusb_write_reg(atusb, RG_TRX_STATE, STATE_FORCE_TRX_OFF);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ STATE_FORCE_TRX_OFF, RG_TRX_STATE, NULL, 0, 1000, GFP_KERNEL);
+
msleep(1); /* reset => TRX_OFF, tTR13 = 37 us */
#if 0
@@ -1104,7 +1051,8 @@ static int atusb_probe(struct usb_interface *interface,
atusb_write_subreg(atusb, SR_RX_SAFE_MODE, 1);
#endif
- atusb_write_reg(atusb, RG_IRQ_MASK, 0xff);
+ usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
+ 0xff, RG_IRQ_MASK, NULL, 0, 1000, GFP_KERNEL);
ret = atusb_get_and_clear_error(atusb);
if (!ret)
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index 36f1c5aa98fc..38c217bd7c82 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -791,7 +791,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev,
phy->idx = idx;
INIT_LIST_HEAD(&phy->edges);
- hw->flags = IEEE802154_HW_PROMISCUOUS;
+ hw->flags = IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_RX_DROP_BAD_CKSUM;
hw->parent = dev;
err = ieee802154_register_hw(hw);
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 1544564bc283..87e1d43c118c 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -320,6 +320,17 @@ gsi_trans_tre_release(struct gsi_trans_info *trans_info, u32 tre_count)
atomic_add(tre_count, &trans_info->tre_avail);
}
+/* Return true if no transactions are allocated, false otherwise */
+bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id)
+{
+ u32 tre_max = gsi_channel_tre_max(gsi, channel_id);
+ struct gsi_trans_info *trans_info;
+
+ trans_info = &gsi->channel[channel_id].trans_info;
+
+ return atomic_read(&trans_info->tre_avail) == tre_max;
+}
+
/* Allocate a GSI transaction on a channel */
struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
u32 tre_count,
diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h
index 17fd1822d8a9..af379b49299e 100644
--- a/drivers/net/ipa/gsi_trans.h
+++ b/drivers/net/ipa/gsi_trans.h
@@ -130,6 +130,16 @@ void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr);
void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool);
/**
+ * gsi_channel_trans_idle() - Return whether no transactions are allocated
+ * @gsi: GSI pointer
+ * @channel_id: Channel the transaction is associated with
+ *
+ * Return: True if no transactions are allocated, false otherwise
+ *
+ */
+bool gsi_channel_trans_idle(struct gsi *gsi, u32 channel_id);
+
+/**
* gsi_channel_trans_alloc() - Allocate a GSI transaction on a channel
* @gsi: GSI pointer
* @channel_id: Channel the transaction is associated with
diff --git a/drivers/net/ipa/ipa_data-v3.1.c b/drivers/net/ipa/ipa_data-v3.1.c
index 06ddb85f39b2..8ff351aefd23 100644
--- a/drivers/net/ipa/ipa_data-v3.1.c
+++ b/drivers/net/ipa/ipa_data-v3.1.c
@@ -101,6 +101,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -148,6 +149,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 8192,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
index 760c22bbdf70..d1c466abddb2 100644
--- a/drivers/net/ipa/ipa_data-v3.5.1.c
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -92,6 +92,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -140,6 +141,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 8192,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
index fea91451a0c3..b1991cc6f0ca 100644
--- a/drivers/net/ipa/ipa_data-v4.11.c
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -86,6 +86,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -133,6 +134,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 32768,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
index 2a231e79d5e1..1190a43e8743 100644
--- a/drivers/net/ipa/ipa_data-v4.2.c
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -82,6 +82,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -130,6 +131,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 8192,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c
index 2da2c4194f2e..944f72b0f285 100644
--- a/drivers/net/ipa/ipa_data-v4.5.c
+++ b/drivers/net/ipa/ipa_data-v4.5.c
@@ -95,6 +95,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -142,6 +143,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 8192,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c
index 2421b5abb5d4..16786bff7ef8 100644
--- a/drivers/net/ipa/ipa_data-v4.9.c
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -87,6 +87,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.aggregation = true,
.status_enable = true,
.rx = {
+ .buffer_size = 8192,
.pad_align = ilog2(sizeof(u32)),
},
},
@@ -134,6 +135,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
.qmap = true,
.aggregation = true,
.rx = {
+ .buffer_size = 8192,
.aggr_close_eof = true,
},
},
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 6d329e9ce5d2..dbbeecf6df29 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -112,6 +112,7 @@ struct ipa_endpoint_tx_data {
/**
* struct ipa_endpoint_rx_data - configuration data for RX endpoints
+ * @buffer_size: requested receive buffer size (bytes)
* @pad_align: power-of-2 boundary to which packet payload is aligned
* @aggr_close_eof: whether aggregation closes on end-of-frame
*
@@ -125,6 +126,7 @@ struct ipa_endpoint_tx_data {
* a "frame" consisting of several transfers has ended.
*/
struct ipa_endpoint_rx_data {
+ u32 buffer_size;
u32 pad_align;
bool aggr_close_eof;
};
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 68291a3efd04..888e94278a84 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -25,10 +25,8 @@
#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
-#define IPA_REPLENISH_BATCH 16
-
-/* RX buffer is 1 page (or a power-of-2 contiguous pages) */
-#define IPA_RX_BUFFER_SIZE 8192 /* PAGE_SIZE > 4096 wastes a LOT */
+/* Hardware is told about receive buffers once a "batch" has been queued */
+#define IPA_REPLENISH_BATCH 16 /* Must be non-zero */
/* The amount of RX buffer space consumed by standard skb overhead */
#define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0))
@@ -75,6 +73,14 @@ struct ipa_status {
#define IPA_STATUS_FLAGS1_RT_RULE_ID_FMASK GENMASK(31, 22)
#define IPA_STATUS_FLAGS2_TAG_FMASK GENMASK_ULL(63, 16)
+static u32 aggr_byte_limit_max(enum ipa_version version)
+{
+ if (version < IPA_VERSION_4_5)
+ return field_max(aggr_byte_limit_fmask(true));
+
+ return field_max(aggr_byte_limit_fmask(false));
+}
+
static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
const struct ipa_gsi_endpoint_data *all_data,
const struct ipa_gsi_endpoint_data *data)
@@ -87,6 +93,9 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
return true;
if (!data->toward_ipa) {
+ u32 buffer_size;
+ u32 limit;
+
if (data->endpoint.filter_support) {
dev_err(dev, "filtering not supported for "
"RX endpoint %u\n",
@@ -94,6 +103,41 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
return false;
}
+ /* Nothing more to check for non-AP RX */
+ if (data->ee_id != GSI_EE_AP)
+ return true;
+
+ buffer_size = data->endpoint.config.rx.buffer_size;
+ /* The buffer size must hold an MTU plus overhead */
+ limit = IPA_MTU + IPA_RX_BUFFER_OVERHEAD;
+ if (buffer_size < limit) {
+ dev_err(dev, "RX buffer size too small for RX endpoint %u (%u < %u)\n",
+ data->endpoint_id, buffer_size, limit);
+ return false;
+ }
+
+ /* For an endpoint supporting receive aggregation, the
+ * aggregation byte limit defines the point at which an
+ * aggregation window will close. It is programmed into the
+ * IPA hardware as a number of KB. We don't use "hard byte
+ * limit" aggregation, so we need to supply enough space in
+ * a receive buffer to hold a complete MTU plus normal skb
+ * overhead *after* that aggregation byte limit has been
+ * crossed.
+ *
+ * This check just ensures the receive buffer size doesn't
+ * exceed what's representable in the aggregation limit field.
+ */
+ if (data->endpoint.config.aggregation) {
+ limit += SZ_1K * aggr_byte_limit_max(ipa->version);
+ if (buffer_size > limit) {
+ dev_err(dev, "RX buffer size too large for aggregated RX endpoint %u (%u > %u)\n",
+ data->endpoint_id, buffer_size, limit);
+
+ return false;
+ }
+ }
+
return true; /* Nothing more to check for RX */
}
@@ -156,21 +200,12 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
return true;
}
-static u32 aggr_byte_limit_max(enum ipa_version version)
-{
- if (version < IPA_VERSION_4_5)
- return field_max(aggr_byte_limit_fmask(true));
-
- return field_max(aggr_byte_limit_fmask(false));
-}
-
static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
const struct ipa_gsi_endpoint_data *data)
{
const struct ipa_gsi_endpoint_data *dp = data;
struct device *dev = &ipa->pdev->dev;
enum ipa_endpoint_name name;
- u32 limit;
if (count > IPA_ENDPOINT_COUNT) {
dev_err(dev, "too many endpoints specified (%u > %u)\n",
@@ -178,26 +213,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
return false;
}
- /* The aggregation byte limit defines the point at which an
- * aggregation window will close. It is programmed into the
- * IPA hardware as a number of KB. We don't use "hard byte
- * limit" aggregation, which means that we need to supply
- * enough space in a receive buffer to hold a complete MTU
- * plus normal skb overhead *after* that aggregation byte
- * limit has been crossed.
- *
- * This check ensures we don't define a receive buffer size
- * that would exceed what we can represent in the field that
- * is used to program its size.
- */
- limit = aggr_byte_limit_max(ipa->version) * SZ_1K;
- limit += IPA_MTU + IPA_RX_BUFFER_OVERHEAD;
- if (limit < IPA_RX_BUFFER_SIZE) {
- dev_err(dev, "buffer size too big for aggregation (%u > %u)\n",
- IPA_RX_BUFFER_SIZE, limit);
- return false;
- }
-
/* Make sure needed endpoints have defined data */
if (ipa_gsi_endpoint_data_empty(&data[IPA_ENDPOINT_AP_COMMAND_TX])) {
dev_err(dev, "command TX endpoint not defined\n");
@@ -723,13 +738,15 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
if (endpoint->data->aggregation) {
if (!endpoint->toward_ipa) {
+ const struct ipa_endpoint_rx_data *rx_data;
bool close_eof;
u32 limit;
+ rx_data = &endpoint->data->rx;
val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK);
val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK);
- limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE);
+ limit = ipa_aggr_size_kb(rx_data->buffer_size);
val |= aggr_byte_limit_encoded(version, limit);
limit = IPA_AGGR_TIME_LIMIT;
@@ -737,7 +754,7 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
/* AGGR_PKT_LIMIT is 0 (unlimited) */
- close_eof = endpoint->data->rx.aggr_close_eof;
+ close_eof = rx_data->aggr_close_eof;
val |= aggr_sw_eof_active_encoded(version, close_eof);
/* AGGR_HARD_BYTE_LIMIT_ENABLE is 0 */
@@ -1020,134 +1037,98 @@ static void ipa_endpoint_status(struct ipa_endpoint *endpoint)
iowrite32(val, ipa->reg_virt + offset);
}
-static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
+static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint,
+ struct gsi_trans *trans)
{
- struct gsi_trans *trans;
- bool doorbell = false;
struct page *page;
+ u32 buffer_size;
u32 offset;
u32 len;
int ret;
- page = dev_alloc_pages(get_order(IPA_RX_BUFFER_SIZE));
+ buffer_size = endpoint->data->rx.buffer_size;
+ page = dev_alloc_pages(get_order(buffer_size));
if (!page)
return -ENOMEM;
- trans = ipa_endpoint_trans_alloc(endpoint, 1);
- if (!trans)
- goto err_free_pages;
-
/* Offset the buffer to make space for skb headroom */
offset = NET_SKB_PAD;
- len = IPA_RX_BUFFER_SIZE - offset;
+ len = buffer_size - offset;
ret = gsi_trans_page_add(trans, page, len, offset);
if (ret)
- goto err_trans_free;
- trans->data = page; /* transaction owns page now */
-
- if (++endpoint->replenish_ready == IPA_REPLENISH_BATCH) {
- doorbell = true;
- endpoint->replenish_ready = 0;
- }
-
- gsi_trans_commit(trans, doorbell);
-
- return 0;
-
-err_trans_free:
- gsi_trans_free(trans);
-err_free_pages:
- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
+ __free_pages(page, get_order(buffer_size));
+ else
+ trans->data = page; /* transaction owns page now */
- return -ENOMEM;
+ return ret;
}
/**
* ipa_endpoint_replenish() - Replenish endpoint receive buffers
* @endpoint: Endpoint to be replenished
- * @add_one: Whether this is replacing a just-consumed buffer
*
* The IPA hardware can hold a fixed number of receive buffers for an RX
* endpoint, based on the number of entries in the underlying channel ring
* buffer. If an endpoint's "backlog" is non-zero, it indicates how many
* more receive buffers can be supplied to the hardware. Replenishing for
- * an endpoint can be disabled, in which case requests to replenish a
- * buffer are "saved", and transferred to the backlog once it is re-enabled
- * again.
+ * an endpoint can be disabled, in which case buffers are not queued to
+ * the hardware.
*/
-static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
+static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint)
{
- struct gsi *gsi;
- u32 backlog;
- int delta;
+ struct gsi_trans *trans;
- if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
- if (add_one)
- atomic_inc(&endpoint->replenish_saved);
+ if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags))
return;
- }
- /* If already active, just update the backlog */
- if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
- if (add_one)
- atomic_inc(&endpoint->replenish_backlog);
+ /* Skip it if it's already active */
+ if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags))
return;
- }
- while (atomic_dec_not_zero(&endpoint->replenish_backlog))
- if (ipa_endpoint_replenish_one(endpoint))
+ while ((trans = ipa_endpoint_trans_alloc(endpoint, 1))) {
+ bool doorbell;
+
+ if (ipa_endpoint_replenish_one(endpoint, trans))
goto try_again_later;
- clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
- if (add_one)
- atomic_inc(&endpoint->replenish_backlog);
+ /* Ring the doorbell if we've got a full batch */
+ doorbell = !(++endpoint->replenish_count % IPA_REPLENISH_BATCH);
+ gsi_trans_commit(trans, doorbell);
+ }
+
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
return;
try_again_later:
+ gsi_trans_free(trans);
clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
- /* The last one didn't succeed, so fix the backlog */
- delta = add_one ? 2 : 1;
- backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
-
/* Whenever a receive buffer transaction completes we'll try to
* replenish again. It's unlikely, but if we fail to supply even
* one buffer, nothing will trigger another replenish attempt.
- * Receive buffer transactions use one TRE, so schedule work to
- * try replenishing again if our backlog is *all* available TREs.
+ * If the hardware has no receive buffers queued, schedule work to
+ * try replenishing again.
*/
- gsi = &endpoint->ipa->gsi;
- if (backlog == gsi_channel_tre_max(gsi, endpoint->channel_id))
+ if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id))
schedule_delayed_work(&endpoint->replenish_work,
msecs_to_jiffies(1));
}
static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
{
- struct gsi *gsi = &endpoint->ipa->gsi;
- u32 max_backlog;
- u32 saved;
-
set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
- while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
- atomic_add(saved, &endpoint->replenish_backlog);
/* Start replenishing if hardware currently has no buffers */
- max_backlog = gsi_channel_tre_max(gsi, endpoint->channel_id);
- if (atomic_read(&endpoint->replenish_backlog) == max_backlog)
- ipa_endpoint_replenish(endpoint, false);
+ if (gsi_channel_trans_idle(&endpoint->ipa->gsi, endpoint->channel_id))
+ ipa_endpoint_replenish(endpoint);
}
static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
{
- u32 backlog;
-
clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
- while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
- atomic_add(backlog, &endpoint->replenish_saved);
}
static void ipa_endpoint_replenish_work(struct work_struct *work)
@@ -1157,7 +1138,7 @@ static void ipa_endpoint_replenish_work(struct work_struct *work)
endpoint = container_of(dwork, struct ipa_endpoint, replenish_work);
- ipa_endpoint_replenish(endpoint, false);
+ ipa_endpoint_replenish(endpoint);
}
static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
@@ -1183,15 +1164,16 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint,
struct page *page, u32 len)
{
+ u32 buffer_size = endpoint->data->rx.buffer_size;
struct sk_buff *skb;
/* Nothing to do if there's no netdev */
if (!endpoint->netdev)
return false;
- WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD));
+ WARN_ON(len > SKB_WITH_OVERHEAD(buffer_size - NET_SKB_PAD));
- skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE);
+ skb = build_skb(page_address(page), buffer_size);
if (skb) {
/* Reserve the headroom and account for the data */
skb_reserve(skb, NET_SKB_PAD);
@@ -1289,8 +1271,9 @@ static bool ipa_endpoint_status_drop(struct ipa_endpoint *endpoint,
static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint,
struct page *page, u32 total_len)
{
+ u32 buffer_size = endpoint->data->rx.buffer_size;
void *data = page_address(page) + NET_SKB_PAD;
- u32 unused = IPA_RX_BUFFER_SIZE - total_len;
+ u32 unused = buffer_size - total_len;
u32 resid = total_len;
while (resid) {
@@ -1360,10 +1343,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint,
{
struct page *page;
- ipa_endpoint_replenish(endpoint, true);
-
if (trans->cancelled)
- return;
+ goto done;
/* Parse or build a socket buffer using the actual received length */
page = trans->data;
@@ -1371,6 +1352,8 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint,
ipa_endpoint_status_parse(endpoint, page, trans->len);
else if (ipa_endpoint_skb_build(endpoint, page, trans->len))
trans->data = NULL; /* Pages have been consumed */
+done:
+ ipa_endpoint_replenish(endpoint);
}
void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint,
@@ -1398,8 +1381,11 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint,
} else {
struct page *page = trans->data;
- if (page)
- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
+ if (page) {
+ u32 buffer_size = endpoint->data->rx.buffer_size;
+
+ __free_pages(page, get_order(buffer_size));
+ }
}
}
@@ -1704,9 +1690,6 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
*/
clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
- atomic_set(&endpoint->replenish_saved,
- gsi_channel_tre_max(gsi, endpoint->channel_id));
- atomic_set(&endpoint->replenish_backlog, 0);
INIT_DELAYED_WORK(&endpoint->replenish_work,
ipa_endpoint_replenish_work);
}
@@ -1882,6 +1865,8 @@ u32 ipa_endpoint_init(struct ipa *ipa, u32 count,
enum ipa_endpoint_name name;
u32 filter_map;
+ BUILD_BUG_ON(!IPA_REPLENISH_BATCH);
+
if (!ipa_endpoint_data_valid(ipa, count, data))
return 0; /* Error */
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 0313cdc607de..12fd5b16c18e 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -65,9 +65,7 @@ enum ipa_replenish_flag {
* @evt_ring_id: GSI event ring used by the endpoint
* @netdev: Network device pointer, if endpoint uses one
* @replenish_flags: Replenishing state flags
- * @replenish_ready: Number of replenish transactions without doorbell
- * @replenish_saved: Replenish requests held while disabled
- * @replenish_backlog: Number of buffers needed to fill hardware queue
+ * @replenish_count: Total number of replenish transactions committed
* @replenish_work: Work item used for repeated replenish failures
*/
struct ipa_endpoint {
@@ -86,9 +84,7 @@ struct ipa_endpoint {
/* Receive buffer replenishing for RX endpoints */
DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
- u32 replenish_ready;
- atomic_t replenish_saved;
- atomic_t replenish_backlog;
+ u64 replenish_count;
struct delayed_work replenish_work; /* global wq */
};
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 7ab4e26db08c..7aafc221b5cf 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -285,7 +285,8 @@ static acpi_status acpi_register_phy(acpi_handle handle, u32 lvl,
const union acpi_object *obj;
u32 phy_addr;
- if (acpi_bus_get_device(handle, &adev))
+ adev = acpi_fetch_acpi_dev(handle);
+ if (!adev)
return AE_OK;
if (acpi_dev_get_property(adev, "phy-channel", ACPI_TYPE_INTEGER, &obj))
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index cd6742e6ba8b..61418d4dc0cd 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -632,35 +632,43 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs,
}
}
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
- struct phylink_link_state *state)
+static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+ const struct phylink_link_state *state)
{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
const struct xpcs_compat *compat;
+ struct dw_xpcs *xpcs;
int i;
- /* phylink expects us to report all supported modes with
- * PHY_INTERFACE_MODE_NA, just don't limit the supported and
- * advertising masks and exit.
- */
- if (state->interface == PHY_INTERFACE_MODE_NA)
- return;
-
- linkmode_zero(xpcs_supported);
-
+ xpcs = phylink_pcs_to_xpcs(pcs);
compat = xpcs_find_compat(xpcs->id, state->interface);
- /* Populate the supported link modes for this
- * PHY interface type
+ /* Populate the supported link modes for this PHY interface type.
+ * FIXME: what about the port modes and autoneg bit? This masks
+ * all those away.
*/
if (compat)
for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
set_bit(compat->supported[i], xpcs_supported);
linkmode_and(supported, supported, xpcs_supported);
- linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
+ return 0;
+}
+
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+{
+ int i, j;
+
+ for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+ const struct xpcs_compat *compat = &xpcs->id->compat[i];
+
+ for (j = 0; j < compat->num_interfaces; j++)
+ if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
+ __set_bit(compat->interface[j], interfaces);
+ }
}
-EXPORT_SYMBOL_GPL(xpcs_validate);
+EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
{
@@ -1106,6 +1114,7 @@ static const struct xpcs_id xpcs_id_list[] = {
};
static const struct phylink_pcs_ops xpcs_phylink_ops = {
+ .pcs_validate = xpcs_validate,
.pcs_config = xpcs_config,
.pcs_get_state = xpcs_get_state,
.pcs_link_up = xpcs_link_up,
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 968dd43a2b1e..a8db1a19011b 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -533,9 +533,7 @@ static int aqcs109_config_init(struct phy_device *phydev)
* PMA speed ability bits are the same for all members of the family,
* AQCS109 however supports speeds up to 2.5G only.
*/
- ret = phy_set_max_speed(phydev, SPEED_2500);
- if (ret)
- return ret;
+ phy_set_max_speed(phydev, SPEED_2500);
return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT);
}
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 29aa811af430..73926006d319 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -19,6 +19,8 @@
#include <linux/regulator/of_regulator.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/consumer.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
#include <dt-bindings/net/qca-ar803x.h>
#define AT803X_SPECIFIC_FUNCTION_CONTROL 0x10
@@ -51,6 +53,8 @@
#define AT803X_INTR_ENABLE_PAGE_RECEIVED BIT(12)
#define AT803X_INTR_ENABLE_LINK_FAIL BIT(11)
#define AT803X_INTR_ENABLE_LINK_SUCCESS BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX BIT(7)
#define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
#define AT803X_INTR_ENABLE_POLARITY_CHANGED BIT(1)
#define AT803X_INTR_ENABLE_WOL BIT(0)
@@ -85,7 +89,17 @@
#define AT803X_DEBUG_DATA 0x1E
#define AT803X_MODE_CFG_MASK 0x0F
-#define AT803X_MODE_CFG_SGMII 0x01
+#define AT803X_MODE_CFG_BASET_RGMII 0x00
+#define AT803X_MODE_CFG_BASET_SGMII 0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM 0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM 0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM 0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM 0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM 0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM 0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET 0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM 0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM 0x0F
#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/
#define AT803X_PSSR_MR_AN_COMPLETE 0x0200
@@ -283,6 +297,8 @@ struct at803x_priv {
u16 clk_25m_mask;
u8 smarteee_lpi_tw_1g;
u8 smarteee_lpi_tw_100m;
+ bool is_fiber;
+ bool is_1000basex;
struct regulator_dev *vddio_rdev;
struct regulator_dev *vddh_rdev;
struct regulator *vddio;
@@ -650,6 +666,55 @@ static int at8031_register_regulators(struct phy_device *phydev)
return 0;
}
+static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+ struct phy_device *phydev = upstream;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+ phy_interface_t iface;
+
+ linkmode_zero(phy_support);
+ phylink_set(phy_support, 1000baseX_Full);
+ phylink_set(phy_support, 1000baseT_Full);
+ phylink_set(phy_support, Autoneg);
+ phylink_set(phy_support, Pause);
+ phylink_set(phy_support, Asym_Pause);
+
+ linkmode_zero(sfp_support);
+ sfp_parse_support(phydev->sfp_bus, id, sfp_support);
+ /* Some modules support 10G modes as well as others we support.
+ * Mask out non-supported modes so the correct interface is picked.
+ */
+ linkmode_and(sfp_support, phy_support, sfp_support);
+
+ if (linkmode_empty(sfp_support)) {
+ dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+ return -EINVAL;
+ }
+
+ iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+ /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+ * interface for use with SFP modules.
+ * However, some copper modules detected as having a preferred SGMII
+ * interface do default to and function in 1000Base-X mode, so just
+ * print a warning and allow such modules, as they may have some chance
+ * of working.
+ */
+ if (iface == PHY_INTERFACE_MODE_SGMII)
+ dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+ else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct sfp_upstream_ops at803x_sfp_ops = {
+ .attach = phy_sfp_attach,
+ .detach = phy_sfp_detach,
+ .module_insert = at803x_sfp_insert,
+};
+
static int at803x_parse_dt(struct phy_device *phydev)
{
struct device_node *node = phydev->mdio.dev.of_node;
@@ -757,6 +822,11 @@ static int at803x_parse_dt(struct phy_device *phydev)
phydev_err(phydev, "failed to get VDDIO regulator\n");
return PTR_ERR(priv->vddio);
}
+
+ /* Only AR8031/8033 support 1000Base-X for SFP modules */
+ ret = phy_sfp_probe(phydev, &at803x_sfp_ops);
+ if (ret < 0)
+ return ret;
}
return 0;
@@ -784,16 +854,24 @@ static int at803x_probe(struct phy_device *phydev)
return ret;
}
- /* Some bootloaders leave the fiber page selected.
- * Switch to the copper page, as otherwise we read
- * the PHY capabilities from the fiber side.
- */
if (phydev->drv->phy_id == ATH8031_PHY_ID) {
- phy_lock_mdio_bus(phydev);
- ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
- phy_unlock_mdio_bus(phydev);
- if (ret)
+ int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+ int mode_cfg;
+
+ if (ccr < 0)
goto err;
+ mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+ switch (mode_cfg) {
+ case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+ case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+ priv->is_1000basex = true;
+ fallthrough;
+ case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+ case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+ priv->is_fiber = true;
+ break;
+ }
}
return 0;
@@ -815,6 +893,7 @@ static void at803x_remove(struct phy_device *phydev)
static int at803x_get_features(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err;
err = genphy_read_abilities(phydev);
@@ -841,12 +920,13 @@ static int at803x_get_features(struct phy_device *phydev)
* As a result of that, ESTATUS_1000_XFULL is set
* to 1 even when operating in copper TP mode.
*
- * Remove this mode from the supported link modes,
- * as this driver currently only supports copper
- * operation.
+ * Remove this mode from the supported link modes
+ * when not operating in 1000BaseX mode.
*/
- linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- phydev->supported);
+ if (!priv->is_1000basex)
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+ phydev->supported);
+
return 0;
}
@@ -910,8 +990,27 @@ static int at8031_pll_config(struct phy_device *phydev)
static int at803x_config_init(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int ret;
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ /* Some bootloaders leave the fiber page selected.
+ * Switch to the appropriate page (fiber or copper), as otherwise we
+ * read the PHY capabilities from the wrong page.
+ */
+ phy_lock_mdio_bus(phydev);
+ ret = at803x_write_page(phydev,
+ priv->is_fiber ? AT803X_PAGE_FIBER :
+ AT803X_PAGE_COPPER);
+ phy_unlock_mdio_bus(phydev);
+ if (ret)
+ return ret;
+
+ ret = at8031_pll_config(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
/* The RX and TX delay default is:
* after HW reset: RX delay enabled and TX delay disabled
* after SW reset: RX delay enabled, while TX delay retains the
@@ -941,12 +1040,6 @@ static int at803x_config_init(struct phy_device *phydev)
if (ret < 0)
return ret;
- if (phydev->drv->phy_id == ATH8031_PHY_ID) {
- ret = at8031_pll_config(phydev);
- if (ret < 0)
- return ret;
- }
-
/* Ar803x extended next page bit is enabled by default. Cisco
* multigig switches read this bit and attempt to negotiate 10Gbps
* rates even if the next page bit is disabled. This is incorrect
@@ -967,6 +1060,7 @@ static int at803x_ack_interrupt(struct phy_device *phydev)
static int at803x_config_intr(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err;
int value;
@@ -983,6 +1077,10 @@ static int at803x_config_intr(struct phy_device *phydev)
value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
value |= AT803X_INTR_ENABLE_LINK_FAIL;
value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+ if (priv->is_fiber) {
+ value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+ value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+ }
err = phy_write(phydev, AT803X_INTR_ENABLE, value);
} else {
@@ -1115,8 +1213,12 @@ static int at803x_read_specific_status(struct phy_device *phydev)
static int at803x_read_status(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int err, old_link = phydev->link;
+ if (priv->is_1000basex)
+ return genphy_c37_read_status(phydev);
+
/* Update the link, but return if there was an error */
err = genphy_update_link(phydev);
if (err)
@@ -1170,6 +1272,7 @@ static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
static int at803x_config_aneg(struct phy_device *phydev)
{
+ struct at803x_priv *priv = phydev->priv;
int ret;
ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
@@ -1186,6 +1289,9 @@ static int at803x_config_aneg(struct phy_device *phydev)
return ret;
}
+ if (priv->is_1000basex)
+ return genphy_c37_config_aneg(phydev);
+
/* Do not restart auto-negotiation by setting ret to 0 defautly,
* when calling __genphy_config_aneg later.
*/
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 271fc01f7f7f..2001f3329133 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -243,7 +243,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
return count;
}
-static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
+static void __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
{
const struct phy_setting *p;
int i;
@@ -254,13 +254,11 @@ static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
else
break;
}
-
- return 0;
}
-static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+static void __set_phy_supported(struct phy_device *phydev, u32 max_speed)
{
- return __set_linkmode_max_speed(max_speed, phydev->supported);
+ __set_linkmode_max_speed(max_speed, phydev->supported);
}
/**
@@ -273,17 +271,11 @@ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
* is connected to a 1G PHY. This function allows the MAC to indicate its
* maximum speed, and so limit what the PHY will advertise.
*/
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
+void phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
{
- int err;
-
- err = __set_phy_supported(phydev, max_speed);
- if (err)
- return err;
+ __set_phy_supported(phydev, max_speed);
phy_advertise_supported(phydev);
-
- return 0;
}
EXPORT_SYMBOL(phy_set_max_speed);
@@ -440,7 +432,9 @@ int phy_speed_down_core(struct phy_device *phydev)
if (min_common_speed == SPEED_UNKNOWN)
return -EINVAL;
- return __set_linkmode_max_speed(min_common_speed, phydev->advertising);
+ __set_linkmode_max_speed(min_common_speed, phydev->advertising);
+
+ return 0;
}
static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 420201858564..5b53a3e23c89 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -132,17 +132,6 @@ void phylink_set_port_modes(unsigned long *mask)
}
EXPORT_SYMBOL_GPL(phylink_set_port_modes);
-void phylink_set_10g_modes(unsigned long *mask)
-{
- phylink_set(mask, 10000baseT_Full);
- phylink_set(mask, 10000baseCR_Full);
- phylink_set(mask, 10000baseSR_Full);
- phylink_set(mask, 10000baseLR_Full);
- phylink_set(mask, 10000baseLRM_Full);
- phylink_set(mask, 10000baseER_Full);
-}
-EXPORT_SYMBOL_GPL(phylink_set_10g_modes);
-
static int phylink_is_empty_linkmode(const unsigned long *linkmode)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, };
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index b554054a7560..e62fc4f2aee0 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -358,6 +358,7 @@ config USB_NET_SMSC95XX
select BITREVERSE
select CRC16
select CRC32
+ imply NET_SELFTESTS
help
This option adds support for SMSC LAN95XX based USB 2.0
10/100 Ethernet adapters.
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index 2a1e31defe71..4334aafab59a 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -192,8 +192,8 @@ extern const struct driver_info ax88172a_info;
/* ASIX specific flags */
#define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */
-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
- u16 size, void *data, int in_pm);
+int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+ u16 size, void *data, int in_pm);
int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
u16 size, void *data, int in_pm);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 71682970be58..524805285019 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -11,8 +11,8 @@
#define AX_HOST_EN_RETRIES 30
-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
- u16 size, void *data, int in_pm)
+int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+ u16 size, void *data, int in_pm)
{
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
@@ -27,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
value, index, data, size);
- if (unlikely(ret < 0))
+ if (unlikely(ret < size)) {
+ ret = ret < 0 ? ret : -ENODATA;
+
netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n",
index, ret);
+ }
return ret;
}
@@ -79,7 +82,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm)
0, 0, 1, &smsr, in_pm);
if (ret == -ENODEV)
break;
- else if (ret < sizeof(smsr))
+ else if (ret < 0)
continue;
else if (smsr & AX_HOST_EN)
break;
@@ -579,8 +582,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
return ret;
}
- asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
- (__u16)loc, 2, &res, 1);
+ ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
+ (__u16)loc, 2, &res, 1);
+ if (ret < 0) {
+ mutex_unlock(&dev->phy_mutex);
+ return ret;
+ }
asix_set_hw_mii(dev, 1);
mutex_unlock(&dev->phy_mutex);
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 4514d35ef4c4..6ea44e53713a 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
priv->phy_addr = ret;
priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10);
- asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+ ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+ if (ret < 0) {
+ netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret);
+ return ret;
+ }
+
chipcode &= AX_CHIPCODE_MASK;
ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) :
@@ -858,7 +863,6 @@ static int marvell_phy_init(struct usbnet *dev)
reg = asix_mdio_read(dev->net, dev->mii.phy_id,
MII_MARVELL_LED_CTRL);
netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
- reg &= 0xfc0f;
}
return 0;
@@ -920,11 +924,21 @@ static int ax88178_reset(struct usbnet *dev)
int gpio0 = 0;
u32 phyid;
- asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
+ ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
+ if (ret < 0) {
+ netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret);
+ return ret;
+ }
+
netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0);
- asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+ ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+ if (ret < 0) {
+ netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret);
+ return ret;
+ }
+
asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0);
netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 82bb5ed94c48..a7c1434fe2da 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -21,6 +21,7 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ipv6_stubs.h>
+#include <net/ndisc.h>
/* alternative VLAN for IP session 0 if not untagged */
#define MBIM_IPS0_VID 4094
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bc1e3dd67c04..5567220e9d16 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -20,6 +20,8 @@
#include <linux/of_net.h>
#include <linux/mdio.h>
#include <linux/phy.h>
+#include <net/selftests.h>
+
#include "smsc95xx.h"
#define SMSC_CHIPNAME "smsc95xx"
@@ -727,6 +729,26 @@ static u32 smsc95xx_get_link(struct net_device *net)
return net->phydev->link;
}
+static void smsc95xx_ethtool_get_strings(struct net_device *netdev, u32 sset,
+ u8 *data)
+{
+ switch (sset) {
+ case ETH_SS_TEST:
+ net_selftest_get_strings(data);
+ break;
+ }
+}
+
+static int smsc95xx_ethtool_get_sset_count(struct net_device *ndev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_TEST:
+ return net_selftest_get_count();
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct ethtool_ops smsc95xx_ethtool_ops = {
.get_link = smsc95xx_get_link,
.nway_reset = phy_ethtool_nway_reset,
@@ -743,6 +765,9 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
+ .self_test = net_selftest,
+ .get_strings = smsc95xx_ethtool_get_strings,
+ .get_sset_count = smsc95xx_ethtool_get_sset_count,
};
static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c
index 30d2912d1a05..6335d7afca24 100644
--- a/drivers/nfc/st-nci/vendor_cmds.c
+++ b/drivers/nfc/st-nci/vendor_cmds.c
@@ -456,7 +456,7 @@ static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
int st_nci_vendor_cmds_init(struct nci_dev *ndev)
{
- return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds,
+ return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds,
sizeof(st_nci_vendor_cmds));
}
EXPORT_SYMBOL(st_nci_vendor_cmds_init);
diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c
index 74882866dbaf..bfa418d4c6b0 100644
--- a/drivers/nfc/st21nfca/vendor_cmds.c
+++ b/drivers/nfc/st21nfca/vendor_cmds.c
@@ -358,7 +358,7 @@ int st21nfca_vendor_cmds_init(struct nfc_hci_dev *hdev)
struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
init_completion(&info->vendor_info.req_completion);
- return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds,
- sizeof(st21nfca_vendor_cmds));
+ return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds,
+ sizeof(st21nfca_vendor_cmds));
}
EXPORT_SYMBOL(st21nfca_vendor_cmds_init);
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 0e4bc8b9329d..b6f2cfd15dd2 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -317,11 +317,18 @@ no_memory:
}
EXPORT_SYMBOL(ptp_clock_register);
+static int unregister_vclock(struct device *dev, void *data)
+{
+ struct ptp_clock *ptp = dev_get_drvdata(dev);
+
+ ptp_vclock_unregister(info_to_vclock(ptp->info));
+ return 0;
+}
+
int ptp_clock_unregister(struct ptp_clock *ptp)
{
if (ptp_vclock_in_use(ptp)) {
- pr_err("ptp: virtual clock in use\n");
- return -EBUSY;
+ device_for_each_child(&ptp->dev, NULL, unregister_vclock);
}
ptp->defunct = 1;
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index 8070f3fd98f0..7d4da9e605ef 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -10,9 +10,10 @@
#include <linux/device.h>
#include <linux/err.h>
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -100,7 +101,6 @@ struct pch_ts_regs {
#define PCH_ECS_ETH (1 << 0)
#define PCH_ECS_CAN (1 << 1)
-#define PCH_STATION_BYTES 6
#define PCH_IEEE1588_ETH (1 << 0)
#define PCH_IEEE1588_CAN (1 << 1)
@@ -115,8 +115,6 @@ struct pch_dev {
int exts0_enabled;
int exts1_enabled;
- u32 mem_base;
- u32 mem_size;
u32 irq;
struct pci_dev *pdev;
spinlock_t register_lock;
@@ -148,28 +146,15 @@ static inline void pch_eth_enable_set(struct pch_dev *chip)
static u64 pch_systime_read(struct pch_ts_regs __iomem *regs)
{
u64 ns;
- u32 lo, hi;
- lo = ioread32(&regs->systime_lo);
- hi = ioread32(&regs->systime_hi);
+ ns = ioread64_lo_hi(&regs->systime_lo);
- ns = ((u64) hi) << 32;
- ns |= lo;
- ns <<= TICKS_NS_SHIFT;
-
- return ns;
+ return ns << TICKS_NS_SHIFT;
}
static void pch_systime_write(struct pch_ts_regs __iomem *regs, u64 ns)
{
- u32 hi, lo;
-
- ns >>= TICKS_NS_SHIFT;
- hi = ns >> 32;
- lo = ns & 0xffffffff;
-
- iowrite32(lo, &regs->systime_lo);
- iowrite32(hi, &regs->systime_hi);
+ iowrite64_lo_hi(ns >> TICKS_NS_SHIFT, &regs->systime_lo);
}
static inline void pch_block_reset(struct pch_dev *chip)
@@ -235,16 +220,10 @@ u64 pch_rx_snap_read(struct pci_dev *pdev)
{
struct pch_dev *chip = pci_get_drvdata(pdev);
u64 ns;
- u32 lo, hi;
- lo = ioread32(&chip->regs->rx_snap_lo);
- hi = ioread32(&chip->regs->rx_snap_hi);
+ ns = ioread64_lo_hi(&chip->regs->rx_snap_lo);
- ns = ((u64) hi) << 32;
- ns |= lo;
- ns <<= TICKS_NS_SHIFT;
-
- return ns;
+ return ns << TICKS_NS_SHIFT;
}
EXPORT_SYMBOL(pch_rx_snap_read);
@@ -252,16 +231,10 @@ u64 pch_tx_snap_read(struct pci_dev *pdev)
{
struct pch_dev *chip = pci_get_drvdata(pdev);
u64 ns;
- u32 lo, hi;
-
- lo = ioread32(&chip->regs->tx_snap_lo);
- hi = ioread32(&chip->regs->tx_snap_hi);
- ns = ((u64) hi) << 32;
- ns |= lo;
- ns <<= TICKS_NS_SHIFT;
+ ns = ioread64_lo_hi(&chip->regs->tx_snap_lo);
- return ns;
+ return ns << TICKS_NS_SHIFT;
}
EXPORT_SYMBOL(pch_tx_snap_read);
@@ -292,8 +265,9 @@ static void pch_reset(struct pch_dev *chip)
*/
int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
{
- s32 i;
struct pch_dev *chip = pci_get_drvdata(pdev);
+ bool valid;
+ u64 mac;
/* Verify the parameter */
if ((chip->regs == NULL) || addr == (u8 *)NULL) {
@@ -301,37 +275,15 @@ int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
"invalid params returning PCH_INVALIDPARAM\n");
return PCH_INVALIDPARAM;
}
- /* For all station address bytes */
- for (i = 0; i < PCH_STATION_BYTES; i++) {
- u32 val;
- s32 tmp;
-
- tmp = hex_to_bin(addr[i * 3]);
- if (tmp < 0) {
- dev_err(&pdev->dev,
- "invalid params returning PCH_INVALIDPARAM\n");
- return PCH_INVALIDPARAM;
- }
- val = tmp * 16;
- tmp = hex_to_bin(addr[(i * 3) + 1]);
- if (tmp < 0) {
- dev_err(&pdev->dev,
- "invalid params returning PCH_INVALIDPARAM\n");
- return PCH_INVALIDPARAM;
- }
- val += tmp;
- /* Expects ':' separated addresses */
- if ((i < 5) && (addr[(i * 3) + 2] != ':')) {
- dev_err(&pdev->dev,
- "invalid params returning PCH_INVALIDPARAM\n");
- return PCH_INVALIDPARAM;
- }
- /* Ideally we should set the address only after validating
- entire string */
- dev_dbg(&pdev->dev, "invoking pch_station_set\n");
- iowrite32(val, &chip->regs->ts_st[i]);
+ valid = mac_pton(addr, (u8 *)&mac);
+ if (!valid) {
+ dev_err(&pdev->dev, "invalid params returning PCH_INVALIDPARAM\n");
+ return PCH_INVALIDPARAM;
}
+
+ dev_dbg(&pdev->dev, "invoking pch_station_set\n");
+ iowrite64_lo_hi(mac, &chip->regs->ts_st);
return 0;
}
EXPORT_SYMBOL(pch_set_station_address);
@@ -344,19 +296,16 @@ static irqreturn_t isr(int irq, void *priv)
struct pch_dev *pch_dev = priv;
struct pch_ts_regs __iomem *regs = pch_dev->regs;
struct ptp_clock_event event;
- u32 ack = 0, lo, hi, val;
+ u32 ack = 0, val;
val = ioread32(&regs->event);
if (val & PCH_TSE_SNS) {
ack |= PCH_TSE_SNS;
if (pch_dev->exts0_enabled) {
- hi = ioread32(&regs->asms_hi);
- lo = ioread32(&regs->asms_lo);
event.type = PTP_CLOCK_EXTTS;
event.index = 0;
- event.timestamp = ((u64) hi) << 32;
- event.timestamp |= lo;
+ event.timestamp = ioread64_hi_lo(&regs->asms_hi);
event.timestamp <<= TICKS_NS_SHIFT;
ptp_clock_event(pch_dev->ptp_clock, &event);
}
@@ -365,12 +314,9 @@ static irqreturn_t isr(int irq, void *priv)
if (val & PCH_TSE_SNM) {
ack |= PCH_TSE_SNM;
if (pch_dev->exts1_enabled) {
- hi = ioread32(&regs->amms_hi);
- lo = ioread32(&regs->amms_lo);
event.type = PTP_CLOCK_EXTTS;
event.index = 1;
- event.timestamp = ((u64) hi) << 32;
- event.timestamp |= lo;
+ event.timestamp = ioread64_hi_lo(&regs->asms_hi);
event.timestamp <<= TICKS_NS_SHIFT;
ptp_clock_event(pch_dev->ptp_clock, &event);
}
@@ -501,31 +447,12 @@ static const struct ptp_clock_info ptp_pch_caps = {
.enable = ptp_pch_enable,
};
-#define pch_suspend NULL
-#define pch_resume NULL
-
static void pch_remove(struct pci_dev *pdev)
{
struct pch_dev *chip = pci_get_drvdata(pdev);
+ free_irq(pdev->irq, chip);
ptp_clock_unregister(chip->ptp_clock);
- /* free the interrupt */
- if (pdev->irq != 0)
- free_irq(pdev->irq, chip);
-
- /* unmap the virtual IO memory space */
- if (chip->regs != NULL) {
- iounmap(chip->regs);
- chip->regs = NULL;
- }
- /* release the reserved IO memory space */
- if (chip->mem_base != 0) {
- release_mem_region(chip->mem_base, chip->mem_size);
- chip->mem_base = 0;
- }
- pci_disable_device(pdev);
- kfree(chip);
- dev_info(&pdev->dev, "complete\n");
}
static s32
@@ -535,50 +462,29 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
unsigned long flags;
struct pch_dev *chip;
- chip = kzalloc(sizeof(struct pch_dev), GFP_KERNEL);
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
if (chip == NULL)
return -ENOMEM;
/* enable the 1588 pci device */
- ret = pci_enable_device(pdev);
+ ret = pcim_enable_device(pdev);
if (ret != 0) {
dev_err(&pdev->dev, "could not enable the pci device\n");
- goto err_pci_en;
+ return ret;
}
- chip->mem_base = pci_resource_start(pdev, IO_MEM_BAR);
- if (!chip->mem_base) {
+ ret = pcim_iomap_regions(pdev, BIT(IO_MEM_BAR), "1588_regs");
+ if (ret) {
dev_err(&pdev->dev, "could not locate IO memory address\n");
- ret = -ENODEV;
- goto err_pci_start;
- }
-
- /* retrieve the available length of the IO memory space */
- chip->mem_size = pci_resource_len(pdev, IO_MEM_BAR);
-
- /* allocate the memory for the device registers */
- if (!request_mem_region(chip->mem_base, chip->mem_size, "1588_regs")) {
- dev_err(&pdev->dev,
- "could not allocate register memory space\n");
- ret = -EBUSY;
- goto err_req_mem_region;
+ return ret;
}
/* get the virtual address to the 1588 registers */
- chip->regs = ioremap(chip->mem_base, chip->mem_size);
-
- if (!chip->regs) {
- dev_err(&pdev->dev, "Could not get virtual address\n");
- ret = -ENOMEM;
- goto err_ioremap;
- }
-
+ chip->regs = pcim_iomap_table(pdev)[IO_MEM_BAR];
chip->caps = ptp_pch_caps;
chip->ptp_clock = ptp_clock_register(&chip->caps, &pdev->dev);
- if (IS_ERR(chip->ptp_clock)) {
- ret = PTR_ERR(chip->ptp_clock);
- goto err_ptp_clock_reg;
- }
+ if (IS_ERR(chip->ptp_clock))
+ return PTR_ERR(chip->ptp_clock);
spin_lock_init(&chip->register_lock);
@@ -598,8 +504,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pch_reset(chip);
iowrite32(DEFAULT_ADDEND, &chip->regs->addend);
- iowrite32(1, &chip->regs->trgt_lo);
- iowrite32(0, &chip->regs->trgt_hi);
+ iowrite64_lo_hi(1, &chip->regs->trgt_lo);
iowrite32(PCH_TSE_TTIPEND, &chip->regs->event);
pch_eth_enable_set(chip);
@@ -617,21 +522,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err_req_irq:
ptp_clock_unregister(chip->ptp_clock);
-err_ptp_clock_reg:
- iounmap(chip->regs);
- chip->regs = NULL;
-err_ioremap:
- release_mem_region(chip->mem_base, chip->mem_size);
-
-err_req_mem_region:
- chip->mem_base = 0;
-
-err_pci_start:
- pci_disable_device(pdev);
-
-err_pci_en:
- kfree(chip);
dev_err(&pdev->dev, "probe failed(ret=0x%x)\n", ret);
return ret;
@@ -646,33 +537,13 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = {
};
MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id);
-static SIMPLE_DEV_PM_OPS(pch_pm_ops, pch_suspend, pch_resume);
-
static struct pci_driver pch_driver = {
.name = KBUILD_MODNAME,
.id_table = pch_ieee1588_pcidev_id,
.probe = pch_probe,
.remove = pch_remove,
- .driver.pm = &pch_pm_ops,
};
-
-static void __exit ptp_pch_exit(void)
-{
- pci_unregister_driver(&pch_driver);
-}
-
-static s32 __init ptp_pch_init(void)
-{
- s32 ret;
-
- /* register the driver with the pci core */
- ret = pci_register_driver(&pch_driver);
-
- return ret;
-}
-
-module_init(ptp_pch_init);
-module_exit(ptp_pch_exit);
+module_pci_driver(pch_driver);
module_param_string(station,
pch_param.station, sizeof(pch_param.station), 0444);
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index 41b92dc2f011..9233bfedeb17 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev,
struct device_attribute *attr, char *page)
{
struct ptp_clock *ptp = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+ return sysfs_emit(page, "%s\n", ptp->info->name);
}
static DEVICE_ATTR_RO(clock_name);
@@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
mutex_unlock(&ptp->pincfg_mux);
- return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+ return sysfs_emit(page, "%u %u\n", func, chan);
}
static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index ab1d233173e1..cb179a3ea508 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -57,6 +57,30 @@ static int ptp_vclock_gettime(struct ptp_clock_info *ptp,
return 0;
}
+static int ptp_vclock_gettimex(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct ptp_vclock *vclock = info_to_vclock(ptp);
+ struct ptp_clock *pptp = vclock->pclock;
+ struct timespec64 pts;
+ unsigned long flags;
+ int err;
+ u64 ns;
+
+ err = pptp->info->gettimex64(pptp->info, &pts, sts);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&vclock->lock, flags);
+ ns = timecounter_cyc2time(&vclock->tc, timespec64_to_ns(&pts));
+ spin_unlock_irqrestore(&vclock->lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
static int ptp_vclock_settime(struct ptp_clock_info *ptp,
const struct timespec64 *ts)
{
@@ -71,6 +95,28 @@ static int ptp_vclock_settime(struct ptp_clock_info *ptp,
return 0;
}
+static int ptp_vclock_getcrosststamp(struct ptp_clock_info *ptp,
+ struct system_device_crosststamp *xtstamp)
+{
+ struct ptp_vclock *vclock = info_to_vclock(ptp);
+ struct ptp_clock *pptp = vclock->pclock;
+ unsigned long flags;
+ int err;
+ u64 ns;
+
+ err = pptp->info->getcrosststamp(pptp->info, xtstamp);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&vclock->lock, flags);
+ ns = timecounter_cyc2time(&vclock->tc, ktime_to_ns(xtstamp->device));
+ spin_unlock_irqrestore(&vclock->lock, flags);
+
+ xtstamp->device = ns_to_ktime(ns);
+
+ return 0;
+}
+
static long ptp_vclock_refresh(struct ptp_clock_info *ptp)
{
struct ptp_vclock *vclock = info_to_vclock(ptp);
@@ -84,11 +130,9 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp)
static const struct ptp_clock_info ptp_vclock_info = {
.owner = THIS_MODULE,
.name = "ptp virtual clock",
- /* The maximum ppb value that long scaled_ppm can support */
- .max_adj = 32767999,
+ .max_adj = 500000000,
.adjfine = ptp_vclock_adjfine,
.adjtime = ptp_vclock_adjtime,
- .gettime64 = ptp_vclock_gettime,
.settime64 = ptp_vclock_settime,
.do_aux_work = ptp_vclock_refresh,
};
@@ -124,6 +168,12 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock)
vclock->pclock = pclock;
vclock->info = ptp_vclock_info;
+ if (pclock->info->gettimex64)
+ vclock->info.gettimex64 = ptp_vclock_gettimex;
+ else
+ vclock->info.gettime64 = ptp_vclock_gettime;
+ if (pclock->info->getcrosststamp)
+ vclock->info.getcrosststamp = ptp_vclock_getcrosststamp;
vclock->cc = ptp_vclock_cc;
snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt",
diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c
index 058b78fac5e3..0a3fb6c115f4 100644
--- a/drivers/soc/fsl/dpio/qbman-portal.c
+++ b/drivers/soc/fsl/dpio/qbman-portal.c
@@ -743,8 +743,8 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
- p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
- s->eqcr.ci = *p & full_mask;
+ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
+ s->eqcr.ci &= full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available) {
@@ -887,8 +887,8 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
- p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
- s->eqcr.ci = *p & full_mask;
+ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
+ s->eqcr.ci &= full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index b525d8cdc25b..88a51b242adc 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -8,6 +8,7 @@
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <linux/rbtree.h>
+#include <net/sock.h>
#include <uapi/linux/bpf.h>
struct sock;
@@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
void *value, u64 flags);
+/* Opportunistic check to see whether we have any BPF program attached*/
+static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
+ enum cgroup_bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_prog_array *array;
+
+ array = rcu_access_pointer(cgrp->bpf.effective[type]);
+ return array != &bpf_empty_prog_array.hdr;
+}
+
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \
+ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \
+ cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
CGROUP_INET_INGRESS); \
\
@@ -181,7 +194,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
int __ret = 0; \
if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
- if (sk_fullsock(__sk)) \
+ if (sk_fullsock(__sk) && \
+ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
CGROUP_INET_EGRESS); \
} \
@@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
kernel_optval) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) && \
+ cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
optname, optval, \
optlen, \
@@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
max_optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) && \
+ cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT)) \
if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
!INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
tcp_bpf_bypass_getsockopt, \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fa517ae604ad..2fc7e5c5ef41 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,17 @@ struct bpf_map {
struct work_struct work;
struct mutex freeze_mutex;
atomic64_t writecnt;
+ /* 'Ownership' of program-containing map is claimed by the first program
+ * that is going to use this map or by the first program which FD is
+ * stored in the map to make sure that all callers and callees have the
+ * same prog type, JITed flag and xdp_has_frags flag.
+ */
+ struct {
+ spinlock_t lock;
+ enum bpf_prog_type type;
+ bool jited;
+ bool xdp_has_frags;
+ } owner;
};
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -321,7 +332,10 @@ enum bpf_type_flag {
*/
MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS),
- __BPF_TYPE_LAST_FLAG = MEM_ALLOC,
+ /* MEM is in user address space. */
+ MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS),
+
+ __BPF_TYPE_LAST_FLAG = MEM_USER,
};
/* Max number of base types. */
@@ -577,8 +591,7 @@ struct bpf_verifier_ops {
const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
- u32 *next_btf_id);
- bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
+ u32 *next_btf_id, enum bpf_type_flag *flag);
};
struct bpf_prog_offload_ops {
@@ -833,8 +846,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
-int bpf_jit_charge_modmem(u32 pages);
-void bpf_jit_uncharge_modmem(u32 pages);
+int bpf_jit_charge_modmem(u32 size);
+void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
#else
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
@@ -939,6 +952,8 @@ struct bpf_prog_aux {
bool func_proto_unreliable;
bool sleepable;
bool tail_call_reachable;
+ bool xdp_has_frags;
+ bool use_bpf_prog_pack;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
@@ -999,16 +1014,6 @@ struct bpf_prog_aux {
};
struct bpf_array_aux {
- /* 'Ownership' of prog array is claimed by the first program that
- * is going to use this map or by the first program which FD is
- * stored in the map to make sure that all callers and callees have
- * the same prog type and JITed flag.
- */
- struct {
- spinlock_t lock;
- enum bpf_prog_type type;
- bool jited;
- } owner;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
@@ -1183,7 +1188,14 @@ struct bpf_event_entry {
struct rcu_head rcu;
};
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+ return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_DEVMAP ||
+ map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1225,6 +1237,19 @@ struct bpf_prog_array {
struct bpf_prog_array_item items[];
};
+struct bpf_empty_prog_array {
+ struct bpf_prog_array hdr;
+ struct bpf_prog *null_prog;
+};
+
+/* to avoid allocating empty bpf_prog_array for cgroups that
+ * don't have bpf program attached use one global 'bpf_empty_prog_array'
+ * It will not be modified the caller of bpf_prog_array_alloc()
+ * (since caller requested prog_cnt == 0)
+ * that pointer should be 'freed' by bpf_prog_array_free()
+ */
+extern struct bpf_empty_prog_array bpf_empty_prog_array;
+
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
@@ -1251,6 +1276,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item;
+ int retval;
};
struct bpf_trace_run_ctx {
@@ -1283,19 +1309,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
-static __always_inline u32
+static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog,
- u32 *ret_flags)
+ int retval, u32 *ret_flags)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
- u32 ret = 1;
u32 func_ret;
+ run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
@@ -1304,27 +1330,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx);
- ret &= (func_ret & 1);
+ if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1);
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
- return ret;
+ return run_ctx.retval;
}
-static __always_inline u32
+static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog)
+ const void *ctx, bpf_prog_run_fn run_prog,
+ int retval)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
- u32 ret = 1;
+ run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
@@ -1332,13 +1360,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
- ret &= run_prog(prog, ctx);
+ if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
- return ret;
+ return run_ctx.retval;
}
static __always_inline u32
@@ -1391,19 +1420,21 @@ out:
* 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn
- * 3: -EPERM skb should be dropped
+ * 3: -err skb should be dropped
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
- if (_ret) \
+ if (_ret && !IS_ERR_VALUE((long)_ret)) \
+ _ret = -EFAULT; \
+ if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
- _ret = (_cn ? NET_XMIT_DROP : -EPERM); \
+ _ret = (_cn ? NET_XMIT_DROP : _ret); \
_ret; \
})
@@ -1724,7 +1755,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@@ -1754,7 +1784,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
- u32 *next_btf_id);
+ u32 *next_btf_id, enum bpf_type_flag *flag);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
const struct btf *need_btf, u32 need_type_id);
@@ -1862,11 +1892,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
-static inline bool dev_map_can_have_prog(struct bpf_map *map)
-{
- return false;
-}
-
static inline void __dev_flush(void)
{
}
@@ -1930,11 +1955,6 @@ static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
return -EOPNOTSUPP;
}
-static inline bool cpu_map_prog_allowed(struct bpf_map *map)
-{
- return false;
-}
-
static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
enum bpf_prog_type type)
{
@@ -1976,12 +1996,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
return -ENOTSUPP;
}
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
- struct module *owner)
-{
- return false;
-}
-
static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -2076,6 +2090,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
@@ -2129,6 +2146,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
@@ -2227,6 +2250,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
extern const struct bpf_func_proto bpf_loop_proto;
extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2339,6 +2363,8 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
+void *bpf_arch_text_copy(void *dst, void *src, size_t len);
+
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e9993172f892..7a7be8c057f2 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
int check_ptr_off_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
return type & ~BPF_BASE_TYPE_MASK;
}
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+ return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 0c74348cbc9d..36bc09b8e890 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -12,11 +12,33 @@
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
+enum btf_kfunc_type {
+ BTF_KFUNC_TYPE_CHECK,
+ BTF_KFUNC_TYPE_ACQUIRE,
+ BTF_KFUNC_TYPE_RELEASE,
+ BTF_KFUNC_TYPE_RET_NULL,
+ BTF_KFUNC_TYPE_MAX,
+};
+
struct btf;
struct btf_member;
struct btf_type;
union bpf_attr;
struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+ struct module *owner;
+ union {
+ struct {
+ struct btf_id_set *check_set;
+ struct btf_id_set *acquire_set;
+ struct btf_id_set *release_set;
+ struct btf_id_set *ret_null_set;
+ };
+ struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+ };
+};
extern const struct file_operations btf_fops;
@@ -216,6 +238,11 @@ static inline bool btf_type_is_var(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
}
+static inline bool btf_type_is_type_tag(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG;
+}
+
/* union is only a special case of struct:
* all its offsetof(member) == 0
*/
@@ -300,6 +327,11 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo(
return (const struct btf_var_secinfo *)(t + 1);
}
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+ return (struct btf_param *)(t + 1);
+}
+
#ifdef CONFIG_BPF_SYSCALL
struct bpf_prog;
@@ -307,6 +339,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *s);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -318,50 +355,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
{
return NULL;
}
-#endif
-
-struct kfunc_btf_id_set {
- struct list_head list;
- struct btf_id_set *set;
- struct module *owner;
-};
-
-struct kfunc_btf_id_list {
- struct list_head list;
- struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type,
+ u32 kfunc_btf_id)
{
+ return false;
}
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
- u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *s)
{
- return false;
+ return 0;
}
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
#endif
-#define DEFINE_KFUNC_BTF_ID_SET(set, name) \
- struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \
- THIS_MODULE }
-
#endif
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 919c0fde1c51..bc5d9cc34e4c 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -11,6 +11,7 @@ struct btf_id_set {
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
/*
* Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
#else
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3c1795fdb568..3f31ff400432 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -31,6 +31,9 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# define __kernel
# ifdef STRUCTLEAK_PLUGIN
# define __user __attribute__((user))
+# elif defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \
+ __has_attribute(btf_type_tag)
+# define __user __attribute__((btf_type_tag("user")))
# else
# define __user
# endif
diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
new file mode 100644
index 000000000000..4359fb0221cf
--- /dev/null
+++ b/include/linux/dsa/tag_qca.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __TAG_QCA_H
+#define __TAG_QCA_H
+
+#define QCA_HDR_LEN 2
+#define QCA_HDR_VERSION 0x2
+
+#define QCA_HDR_RECV_VERSION GENMASK(15, 14)
+#define QCA_HDR_RECV_PRIORITY GENMASK(13, 11)
+#define QCA_HDR_RECV_TYPE GENMASK(10, 6)
+#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3)
+#define QCA_HDR_RECV_SOURCE_PORT GENMASK(2, 0)
+
+/* Packet type for recv */
+#define QCA_HDR_RECV_TYPE_NORMAL 0x0
+#define QCA_HDR_RECV_TYPE_MIB 0x1
+#define QCA_HDR_RECV_TYPE_RW_REG_ACK 0x2
+
+#define QCA_HDR_XMIT_VERSION GENMASK(15, 14)
+#define QCA_HDR_XMIT_PRIORITY GENMASK(13, 11)
+#define QCA_HDR_XMIT_CONTROL GENMASK(10, 8)
+#define QCA_HDR_XMIT_FROM_CPU BIT(7)
+#define QCA_HDR_XMIT_DP_BIT GENMASK(6, 0)
+
+/* Packet type for xmit */
+#define QCA_HDR_XMIT_TYPE_NORMAL 0x0
+#define QCA_HDR_XMIT_TYPE_RW_REG 0x1
+
+/* Check code for a valid mgmt packet. Switch will ignore the packet
+ * with this wrong.
+ */
+#define QCA_HDR_MGMT_CHECK_CODE_VAL 0x5
+
+/* Specific define for in-band MDIO read/write with Ethernet packet */
+#define QCA_HDR_MGMT_SEQ_LEN 4 /* 4 byte for the seq */
+#define QCA_HDR_MGMT_COMMAND_LEN 4 /* 4 byte for the command */
+#define QCA_HDR_MGMT_DATA1_LEN 4 /* First 4 byte for the mdio data */
+#define QCA_HDR_MGMT_HEADER_LEN (QCA_HDR_MGMT_SEQ_LEN + \
+ QCA_HDR_MGMT_COMMAND_LEN + \
+ QCA_HDR_MGMT_DATA1_LEN)
+
+#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */
+#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */
+
+#define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \
+ QCA_HDR_LEN + \
+ QCA_HDR_MGMT_DATA2_LEN + \
+ QCA_HDR_MGMT_PADDING_LEN)
+
+#define QCA_HDR_MGMT_SEQ_NUM GENMASK(31, 0) /* 63, 32 */
+#define QCA_HDR_MGMT_CHECK_CODE GENMASK(31, 29) /* 31, 29 */
+#define QCA_HDR_MGMT_CMD BIT(28) /* 28 */
+#define QCA_HDR_MGMT_LENGTH GENMASK(23, 20) /* 23, 20 */
+#define QCA_HDR_MGMT_ADDR GENMASK(18, 0) /* 18, 0 */
+
+/* Special struct emulating a Ethernet header */
+struct qca_mgmt_ethhdr {
+ u32 command; /* command bit 31:0 */
+ u32 seq; /* seq 63:32 */
+ u32 mdio_data; /* first 4byte mdio */
+ __be16 hdr; /* qca hdr */
+} __packed;
+
+enum mdio_cmd {
+ MDIO_WRITE = 0x0,
+ MDIO_READ
+};
+
+struct mib_ethhdr {
+ u32 data[3]; /* first 3 mib counter */
+ __be16 hdr; /* qca hdr */
+} __packed;
+
+struct qca_tagger_data {
+ void (*rw_reg_ack_handler)(struct dsa_switch *ds,
+ struct sk_buff *skb);
+ void (*mib_autocast_handler)(struct dsa_switch *ds,
+ struct sk_buff *skb);
+};
+
+#endif /* __TAG_QCA_H */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 11efc45de66a..e0853f48b75e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -70,9 +70,11 @@ enum {
/**
* struct kernel_ethtool_ringparam - RX/TX ring configuration
* @rx_buf_len: Current length of buffers on the rx ring.
+ * @tcp_data_split: Scatter packet headers and data to separate buffers
*/
struct kernel_ethtool_ringparam {
u32 rx_buf_len;
+ u8 tcp_data_split;
};
/**
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 71fa57b88bfc..1cb1af917617 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,7 +548,7 @@ struct sock_fprog_kern {
#define BPF_IMAGE_ALIGNMENT 8
struct bpf_binary_header {
- u32 pages;
+ u32 size;
u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};
@@ -886,17 +886,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
set_vm_flush_reset_perms(hdr);
- set_memory_ro((unsigned long)hdr, hdr->pages);
- set_memory_x((unsigned long)hdr, hdr->pages);
-}
-
-static inline struct bpf_binary_header *
-bpf_jit_binary_hdr(const struct bpf_prog *fp)
-{
- unsigned long real_start = (unsigned long)fp->bpf_func;
- unsigned long addr = real_start & PAGE_MASK;
-
- return (void *)addr;
+ set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
+ set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
}
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
@@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size);
void bpf_jit_free_exec(void *addr);
void bpf_jit_free(struct bpf_prog *fp);
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
+ unsigned int alignment,
+ struct bpf_binary_header **rw_hdr,
+ u8 **rw_image,
+ bpf_jit_fill_hole_t bpf_fill_ill_insns);
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+ struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header);
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header);
+
int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
struct bpf_jit_poke_descriptor *poke);
@@ -1356,7 +1359,10 @@ struct bpf_sockopt_kern {
s32 level;
s32 optname;
s32 optlen;
- s32 retval;
+ /* for retval in struct bpf_cg_run_ctx */
+ struct task_struct *current_task;
+ /* Temporary "register" for indirect stores to ppos. */
+ u64 tmp_reg;
};
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a59d25f19385..16870f86c74d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -51,7 +51,7 @@ struct ipv6_devconf {
__s32 use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- __s32 mc_forwarding;
+ atomic_t mc_forwarding;
#endif
__s32 disable_ipv6;
__s32 drop_unicast_in_l2_multicast;
@@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
return NULL;
}
-static inline struct inet6_request_sock *
- inet6_rsk(const struct request_sock *rsk)
-{
- return NULL;
-}
-
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
{
return NULL;
}
#define inet6_rcv_saddr(__sk) NULL
-#define tcp_twsk_ipv6only(__sk) 0
#define inet_v6_ipv6only(__sk) 0
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* _IPV6_H */
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index f8397f300fcd..15e0e0209da4 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr,
linkmode_clear_bit(nr, addr);
}
-static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
-{
- __change_bit(nr, addr);
-}
-
static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
{
return test_bit(nr, addr);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 12ea29e04293..5ee13083cec7 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -355,56 +355,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv)
}
/**
- * mii_lpa_mod_linkmode_adv_sgmii
- * @lp_advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_LPA bits to
- * linkmode advertisement settings for SGMII.
- * Leaves other bits unchanged.
- */
-static inline void
-mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa)
-{
- u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK;
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_1000HALF);
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_1000FULL);
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_100HALF);
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_100FULL);
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_10HALF);
-
- linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising,
- speed_duplex == LPA_SGMII_10FULL);
-}
-
-/**
- * mii_lpa_to_linkmode_adv_sgmii
- * @advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_ADVERTISE bits
- * to linkmode advertisement settings when in SGMII mode.
- * Clears the old value of advertising.
- */
-static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
- u32 lpa)
-{
- linkmode_zero(lp_advertising);
-
- mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
-}
-
-/**
* mii_adv_mod_linkmode_adv_t
* @advertising:pointer to destination link mode.
* @adv: value of the MII_ADVERTISE register
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 598ac3bcc901..27145c4d6820 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -64,13 +64,6 @@ enum {
};
enum {
- MLX5_MODIFY_TIR_BITMASK_LRO = 0x0,
- MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1,
- MLX5_MODIFY_TIR_BITMASK_HASH = 0x2,
- MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3
-};
-
-enum {
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index 6b3267b49755..ed42bd5f639f 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -26,11 +26,6 @@ struct i40e_client_version {
u8 rsvd;
};
-enum i40e_client_state {
- __I40E_CLIENT_NULL,
- __I40E_CLIENT_REGISTERED
-};
-
enum i40e_client_instance_state {
__I40E_CLIENT_INSTANCE_NONE,
__I40E_CLIENT_INSTANCE_OPENED,
@@ -190,11 +185,6 @@ struct i40e_client {
const struct i40e_client_ops *ops; /* client ops provided by the client */
};
-static inline bool i40e_client_is_registered(struct i40e_client *client)
-{
- return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
-}
-
void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client);
void i40e_client_device_unregister(struct i40e_info *ldev);
diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
index 1289593411d3..1c1332e4df26 100644
--- a/include/linux/net/intel/iidc.h
+++ b/include/linux/net/intel/iidc.h
@@ -32,6 +32,8 @@ enum iidc_rdma_protocol {
};
#define IIDC_MAX_USER_PRIORITY 8
+#define IIDC_MAX_DSCP_MAPPING 64
+#define IIDC_DSCP_PFC_MODE 0x1
/* Struct to hold per RDMA Qset info */
struct iidc_rdma_qset_params {
@@ -60,6 +62,8 @@ struct iidc_qos_params {
u8 vport_relative_bw;
u8 vport_priority_type;
u8 num_tc;
+ u8 pfc_mode;
+ u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
};
struct iidc_event {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e490b84732d1..5f6e2c0b0c90 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1948,6 +1948,8 @@ enum netdev_ml_priv_type {
* @dev_addr_shadow: Copy of @dev_addr to catch direct writes.
* @linkwatch_dev_tracker: refcount tracker used by linkwatch.
* @watchdog_dev_tracker: refcount tracker used by watchdog.
+ * @dev_registered_tracker: tracker for reference held while
+ * registered
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
@@ -2282,6 +2284,7 @@ struct net_device {
u8 dev_addr_shadow[MAX_ADDR_LEN];
netdevice_tracker linkwatch_dev_tracker;
netdevice_tracker watchdog_dev_tracker;
+ netdevice_tracker dev_registered_tracker;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -3817,14 +3820,7 @@ extern unsigned int netdev_budget_usecs;
/* Called by rtnetlink.c:rtnl_unlock() */
void netdev_run_todo(void);
-/**
- * dev_put - release reference to device
- * @dev: network device
- *
- * Release reference to device to allow it to be freed.
- * Try using dev_put_track() instead.
- */
-static inline void dev_put(struct net_device *dev)
+static inline void __dev_put(struct net_device *dev)
{
if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
@@ -3835,14 +3831,7 @@ static inline void dev_put(struct net_device *dev)
}
}
-/**
- * dev_hold - get reference to device
- * @dev: network device
- *
- * Hold reference to device to keep it from being freed.
- * Try using dev_hold_track() instead.
- */
-static inline void dev_hold(struct net_device *dev)
+static inline void __dev_hold(struct net_device *dev)
{
if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
@@ -3853,11 +3842,24 @@ static inline void dev_hold(struct net_device *dev)
}
}
+static inline void __netdev_tracker_alloc(struct net_device *dev,
+ netdevice_tracker *tracker,
+ gfp_t gfp)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+/* netdev_tracker_alloc() can upgrade a prior untracked reference
+ * taken by dev_get_by_name()/dev_get_by_index() to a tracked one.
+ */
static inline void netdev_tracker_alloc(struct net_device *dev,
netdevice_tracker *tracker, gfp_t gfp)
{
#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
- ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+ refcount_dec(&dev->refcnt_tracker.no_tracker);
+ __netdev_tracker_alloc(dev, tracker, gfp);
#endif
}
@@ -3873,8 +3875,8 @@ static inline void dev_hold_track(struct net_device *dev,
netdevice_tracker *tracker, gfp_t gfp)
{
if (dev) {
- dev_hold(dev);
- netdev_tracker_alloc(dev, tracker, gfp);
+ __dev_hold(dev);
+ __netdev_tracker_alloc(dev, tracker, gfp);
}
}
@@ -3883,10 +3885,34 @@ static inline void dev_put_track(struct net_device *dev,
{
if (dev) {
netdev_tracker_free(dev, tracker);
- dev_put(dev);
+ __dev_put(dev);
}
}
+/**
+ * dev_hold - get reference to device
+ * @dev: network device
+ *
+ * Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
+ */
+static inline void dev_hold(struct net_device *dev)
+{
+ dev_hold_track(dev, NULL, GFP_ATOMIC);
+}
+
+/**
+ * dev_put - release reference to device
+ * @dev: network device
+ *
+ * Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
+ */
+static inline void dev_put(struct net_device *dev)
+{
+ dev_put_track(dev, NULL);
+}
+
static inline void dev_replace_track(struct net_device *odev,
struct net_device *ndev,
netdevice_tracker *tracker,
@@ -3895,11 +3921,11 @@ static inline void dev_replace_track(struct net_device *odev,
if (odev)
netdev_tracker_free(odev, tracker);
- dev_hold(ndev);
- dev_put(odev);
+ __dev_hold(ndev);
+ __dev_put(odev);
if (ndev)
- netdev_tracker_alloc(ndev, tracker, gfp);
+ __netdev_tracker_alloc(ndev, tracker, gfp);
}
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 15e71bfff726..c2c6f332fb90 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -379,6 +379,7 @@ struct nf_nat_hook {
unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
enum nf_nat_manip_type mtype,
enum ip_conntrack_dir dir);
+ void (*remove_nat_bysrc)(struct nf_conn *ct);
};
extern const struct nf_nat_hook __rcu *nf_nat_hook;
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index a28aa289afdc..c3bdb4370938 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -300,26 +300,22 @@ union pptp_ctrl_union {
struct PptpSetLinkInfo setlink;
};
-extern int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-extern int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-extern void
-(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *exp_orig,
- struct nf_conntrack_expect *exp_reply);
-
-extern void
-(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
- struct nf_conntrack_expect *exp);
+struct nf_nat_pptp_hook {
+ int (*outbound)(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq);
+ int (*inbound)(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq);
+ void (*exp_gre)(struct nf_conntrack_expect *exp_orig,
+ struct nf_conntrack_expect *exp_reply);
+ void (*expectfn)(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp);
+};
+extern const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook;
#endif /* _NF_CONNTRACK_PPTP_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1ec631838af9..bda1c385cffb 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
extack->cookie_len = sizeof(cookie);
}
-static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
- u32 cookie)
-{
- if (!extack)
- return;
- memcpy(extack->cookie, &cookie, sizeof(cookie));
- extack->cookie_len = sizeof(cookie);
-}
-
void netlink_kernel_release(struct sock *sk);
int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index add077a81b21..266eb26fb029 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
phy_interface_t interface, int speed, int duplex);
int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
unsigned int mode);
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
- struct phylink_link_state *state);
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
int enable);
struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6de8d7a90d78..cd08cf1a8b0d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1661,7 +1661,7 @@ int phy_disable_interrupts(struct phy_device *phydev);
void phy_request_interrupt(struct phy_device *phydev);
void phy_free_interrupt(struct phy_device *phydev);
void phy_print_status(struct phy_device *phydev);
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
+void phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
void phy_advertise_supported(struct phy_device *phydev);
void phy_support_sym_pause(struct phy_device *phydev);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 713a0c928b7c..cca149f78d35 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -582,7 +582,6 @@ int phylink_speed_up(struct phylink *pl);
#define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode)
void phylink_set_port_modes(unsigned long *bits);
-void phylink_set_10g_modes(unsigned long *mask);
void phylink_helper_basex_speed(struct phylink_link_state *state);
void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 60f3453be23e..9ca353ab712b 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -13,6 +13,8 @@ struct ref_tracker_dir {
spinlock_t lock;
unsigned int quarantine_avail;
refcount_t untracked;
+ refcount_t no_tracker;
+ bool dead;
struct list_head list; /* List of active trackers */
struct list_head quarantine; /* List of dead trackers */
#endif
@@ -26,7 +28,9 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
INIT_LIST_HEAD(&dir->quarantine);
spin_lock_init(&dir->lock);
dir->quarantine_avail = quarantine_count;
+ dir->dead = false;
refcount_set(&dir->untracked, 1);
+ refcount_set(&dir->no_tracker, 1);
stack_depot_init();
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8a636e678902..a5adbf6b51e8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,12 +314,38 @@ struct sk_buff;
* used to translate the reason to string.
*/
enum skb_drop_reason {
- SKB_DROP_REASON_NOT_SPECIFIED,
- SKB_DROP_REASON_NO_SOCKET,
- SKB_DROP_REASON_PKT_TOO_SMALL,
- SKB_DROP_REASON_TCP_CSUM,
- SKB_DROP_REASON_SOCKET_FILTER,
- SKB_DROP_REASON_UDP_CSUM,
+ SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */
+ SKB_DROP_REASON_NO_SOCKET, /* socket not found */
+ SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */
+ SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */
+ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */
+ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */
+ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */
+ SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current
+ * host (interface is in promisc
+ * mode)
+ */
+ SKB_DROP_REASON_IP_CSUM, /* IP checksum error */
+ SKB_DROP_REASON_IP_INHDR, /* there is something wrong with
+ * IP header (see
+ * IPSTATS_MIB_INHDRERRORS)
+ */
+ SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed.
+ * see the document for rp_filter
+ * in ip-sysctl.rst for more
+ * information
+ */
+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2
+ * is multicast, but L3 is
+ * unicast.
+ */
+ SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */
+ SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */
+ SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */
+ SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as
+ * udp packet drop out of
+ * udp_memory_allocated.
+ */
SKB_DROP_REASON_MAX,
};
@@ -557,6 +583,7 @@ struct skb_shared_info {
* Warning : all fields before dataref are cleared in __alloc_skb()
*/
atomic_t dataref;
+ unsigned int xdp_frags_size;
/* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */
@@ -3898,11 +3925,6 @@ static inline ktime_t net_timedelta(ktime_t t)
return ktime_sub(ktime_get_real(), t);
}
-static inline ktime_t net_invalid_timestamp(void)
-{
- return 0;
-}
-
static inline u8 skb_metadata_len(const struct sk_buff *skb)
{
return skb_shinfo(skb)->meta_len;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 18a717fe62eb..fdb5375f0562 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -29,7 +29,7 @@ struct sk_msg_sg {
u32 end;
u32 size;
u32 copybreak;
- unsigned long copy;
+ DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2);
/* The extra two elements:
* 1) used for chaining the front and sections when the list becomes
* partitioned (e.g. end < start). The crypto APIs require the
@@ -38,7 +38,6 @@ struct sk_msg_sg {
*/
struct scatterlist data[MAX_MSG_FRAGS + 2];
};
-static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS);
/* UAPI in filter.c depends on struct sk_msg_sg being first element. */
struct sk_msg {
@@ -171,11 +170,6 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end)
#define sk_msg_iter_next(msg, which) \
sk_msg_iter_var_next(msg->sg.which)
-static inline void sk_msg_clear_meta(struct sk_msg *msg)
-{
- memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
-}
-
static inline void sk_msg_init(struct sk_msg *msg)
{
BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS);
@@ -234,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
{
struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
- if (test_bit(msg->sg.start, &msg->sg.copy)) {
+ if (test_bit(msg->sg.start, msg->sg.copy)) {
msg->data = NULL;
msg->data_end = NULL;
} else {
@@ -253,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
sg_set_page(sge, page, len, offset);
sg_unmark_end(sge);
- __set_bit(msg->sg.end, &msg->sg.copy);
+ __set_bit(msg->sg.end, msg->sg.copy);
msg->sg.size += len;
sk_msg_iter_next(msg, end);
}
@@ -262,9 +256,9 @@ static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
{
do {
if (copy_state)
- __set_bit(i, &msg->sg.copy);
+ __set_bit(i, msg->sg.copy);
else
- __clear_bit(i, &msg->sg.copy);
+ __clear_bit(i, msg->sg.copy);
sk_msg_iter_var_next(i);
if (i == msg->sg.end)
break;
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 571f605bc91e..382af90320ac 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -88,6 +88,7 @@ struct svc_xprt {
struct list_head xpt_users; /* callbacks on free */
struct net *xpt_net;
+ netns_tracker ns_tracker;
const struct cred *xpt_cred;
struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */
struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 955ea4d7af0b..3cdc8d878d81 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -284,6 +284,7 @@ struct rpc_xprt {
} stat;
struct net *xprt_net;
+ netns_tracker ns_tracker;
const char *servername;
const char *address_strings[RPC_DISPLAY_MAX];
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index ae66dadd8543..254a2654400f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
return (struct udphdr *)skb_transport_header(skb);
}
-static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
-{
- return (struct udphdr *)skb_inner_transport_header(skb);
-}
-
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1198a2bfc9bf..739285fe5a2f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -273,6 +273,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
i->count = count;
}
+static inline int
+iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
+{
+ size_t shorted = 0;
+ int npages;
+
+ if (iov_iter_count(i) > max_bytes) {
+ shorted = iov_iter_count(i) - max_bytes;
+ iov_iter_truncate(i, max_bytes);
+ }
+ npages = iov_iter_npages(i, INT_MAX);
+ if (shorted)
+ iov_iter_reexpand(i, iov_iter_count(i) + shorted);
+
+ return npages;
+}
+
struct csum_state {
__wsum csum;
size_t off;
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 8221af1811df..0f9790c455bb 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -187,18 +187,12 @@ typedef struct {
typedef struct ax25_route {
struct ax25_route *next;
- refcount_t refcount;
ax25_address callsign;
struct net_device *dev;
ax25_digi *digipeat;
char ip_mode;
} ax25_route;
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
- refcount_inc(&ax25_rt->refcount);
-}
-
void __ax25_put_route(ax25_route *ax25_rt);
extern rwlock_t ax25_route_lock;
@@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void)
read_unlock(&ax25_route_lock);
}
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
- if (refcount_dec_and_test(&ax25_rt->refcount))
- __ax25_put_route(ax25_rt);
-}
-
typedef struct {
char slave; /* slave_mode? */
struct timer_list slave_timer; /* timeout timer */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 586f69d084a2..f5caff1ddb29 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -258,6 +258,15 @@ struct adv_info {
#define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F
+struct monitored_device {
+ struct list_head list;
+
+ bdaddr_t bdaddr;
+ __u8 addr_type;
+ __u16 handle;
+ bool notified;
+};
+
struct adv_pattern {
struct list_head list;
__u8 ad_type;
@@ -294,6 +303,9 @@ struct adv_monitor {
#define HCI_MAX_SHORT_NAME_LENGTH 10
+#define HCI_CONN_HANDLE_UNSET 0xffff
+#define HCI_CONN_HANDLE_MAX 0x0eff
+
/* Min encryption key size to match with SMP */
#define HCI_MIN_ENC_KEY_SIZE 7
@@ -591,6 +603,9 @@ struct hci_dev {
struct delayed_work interleave_scan;
+ struct list_head monitored_devices;
+ bool advmon_pend_notify;
+
#if IS_ENABLED(CONFIG_BT_LEDS)
struct led_trigger *power_led;
#endif
@@ -1847,6 +1862,8 @@ void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status);
int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status);
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+ bdaddr_t *bdaddr, u8 addr_type);
u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
u16 to_multiplier);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 107b25deae68..99266f7aebdc 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -1104,3 +1104,19 @@ struct mgmt_ev_controller_resume {
#define MGMT_WAKE_REASON_NON_BT_WAKE 0x0
#define MGMT_WAKE_REASON_UNEXPECTED 0x1
#define MGMT_WAKE_REASON_REMOTE_WAKE 0x2
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f
+struct mgmt_ev_adv_monitor_device_found {
+ __le16 monitor_handle;
+ struct mgmt_addr_info addr;
+ __s8 rssi;
+ __le32 flags;
+ __le16 eir_len;
+ __u8 eir[0];
+} __packed;
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030
+struct mgmt_ev_adv_monitor_device_lost {
+ __le16 monitor_handle;
+ struct mgmt_addr_info addr;
+} __packed;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 83cfd2d70247..7dead855a72d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -699,20 +699,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
}
/* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
- const u8 *mac)
-{
- struct list_head *iter;
- struct slave *tmp;
-
- bond_for_each_slave_rcu(bond, tmp, iter)
- if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
- return tmp;
-
- return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 6ed07844eb24..833672d6fbe4 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -227,6 +227,16 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
write_pnet(&wpan_phy->_net, net);
}
+/**
+ * struct ieee802154_addr - IEEE802.15.4 device address
+ * @mode: Address mode from frame header. Can be one of:
+ * - @IEEE802154_ADDR_NONE
+ * - @IEEE802154_ADDR_SHORT
+ * - @IEEE802154_ADDR_LONG
+ * @pan_id: The PAN ID this address belongs to
+ * @short_addr: address if @mode is @IEEE802154_ADDR_SHORT
+ * @extended_addr: address if @mode is @IEEE802154_ADDR_LONG
+ */
struct ieee802154_addr {
u8 mode;
__le16 pan_id;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 57b3e4e7413b..fd1f62a6e0a8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -278,6 +278,10 @@ struct dsa_port {
u8 devlink_port_setup:1;
+ /* Master state bits, valid only on CPU ports */
+ u8 master_admin_up:1;
+ u8 master_oper_up:1;
+
u8 setup:1;
struct device_node *dn;
@@ -478,6 +482,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp)
return dp->type == DSA_PORT_TYPE_UNUSED;
}
+static inline bool dsa_port_master_is_operational(struct dsa_port *dp)
+{
+ return dsa_port_is_cpu(dp) && dp->master_admin_up &&
+ dp->master_oper_up;
+}
+
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
{
return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
@@ -581,6 +591,24 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port)
return port == dsa_upstream_port(ds, port);
}
+/* Return true if this is a DSA port leading away from the CPU */
+static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port)
+{
+ return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port);
+}
+
+/* Return the local port used to reach the CPU port */
+static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+
+ dsa_switch_for_each_available_port(dp, ds) {
+ return dsa_upstream_port(ds, dp->index);
+ }
+
+ return ds->num_ports;
+}
+
/* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning
* that the routing port from @downstream_ds to @upstream_ds is also the port
* which @downstream_ds uses to reach its dedicated CPU.
@@ -1036,6 +1064,13 @@ struct dsa_switch_ops {
int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
u16 flags);
int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
+
+ /*
+ * DSA master tracking operations
+ */
+ void (*master_state_change)(struct dsa_switch *ds,
+ const struct net_device *master,
+ bool operational);
};
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \
@@ -1246,7 +1281,7 @@ module_exit(dsa_tag_driver_module_exit)
/**
* module_dsa_tag_drivers() - Helper macro for registering DSA tag
* drivers
- * @__ops_array: Array of tag driver strucutres
+ * @__ops_array: Array of tag driver structures
*
* Helper macro for DSA tag drivers which do not do anything special
* in module init/exit. Each module may only use this macro once, and
diff --git a/include/net/gro.h b/include/net/gro.h
index 8f75802d50fd..a765fedda5c4 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,46 +29,50 @@ struct napi_gro_cb {
/* Number of segments aggregated. */
u16 count;
- /* Start offset for remote checksum offload */
- u16 gro_remcsum_start;
+ /* Used in ipv6_gro_receive() and foo-over-udp */
+ u16 proto;
/* jiffies when first packet was created/queued */
unsigned long age;
- /* Used in ipv6_gro_receive() and foo-over-udp */
- u16 proto;
+ /* portion of the cb set to zero at every gro iteration */
+ struct_group(zeroed,
+
+ /* Start offset for remote checksum offload */
+ u16 gro_remcsum_start;
- /* This is non-zero if the packet may be of the same flow. */
- u8 same_flow:1;
+ /* This is non-zero if the packet may be of the same flow. */
+ u8 same_flow:1;
- /* Used in tunnel GRO receive */
- u8 encap_mark:1;
+ /* Used in tunnel GRO receive */
+ u8 encap_mark:1;
- /* GRO checksum is valid */
- u8 csum_valid:1;
+ /* GRO checksum is valid */
+ u8 csum_valid:1;
- /* Number of checksums via CHECKSUM_UNNECESSARY */
- u8 csum_cnt:3;
+ /* Number of checksums via CHECKSUM_UNNECESSARY */
+ u8 csum_cnt:3;
- /* Free the skb? */
- u8 free:2;
+ /* Free the skb? */
+ u8 free:2;
#define NAPI_GRO_FREE 1
#define NAPI_GRO_FREE_STOLEN_HEAD 2
- /* Used in foo-over-udp, set in udp[46]_gro_receive */
- u8 is_ipv6:1;
+ /* Used in foo-over-udp, set in udp[46]_gro_receive */
+ u8 is_ipv6:1;
- /* Used in GRE, set in fou/gue_gro_receive */
- u8 is_fou:1;
+ /* Used in GRE, set in fou/gue_gro_receive */
+ u8 is_fou:1;
- /* Used to determine if flush_id can be ignored */
- u8 is_atomic:1;
+ /* Used to determine if flush_id can be ignored */
+ u8 is_atomic:1;
- /* Number of gro_receive callbacks this packet already went through */
- u8 recursion_counter:4;
+ /* Number of gro_receive callbacks this packet already went through */
+ u8 recursion_counter:4;
- /* GRO is done by frag_list pointer chaining. */
- u8 is_flist:1;
+ /* GRO is done by frag_list pointer chaining. */
+ u8 is_flist:1;
+ );
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 4ad47d9f9d27..3908296d103f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -285,6 +285,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
+static inline unsigned long
+reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
+{
+ u64 timeout = (u64)req->timeout << req->num_timeout;
+
+ return (unsigned long)min_t(u64, timeout, max_timeout);
+}
+
static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
{
/* The below has to be done to allow calling inet_csk_destroy_sock */
diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
new file mode 100644
index 000000000000..72f250dffada
--- /dev/null
+++ b/include/net/inet_dscp.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP)
+ *
+ * DSCP is defined in RFC 2474:
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | DSCP | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * DSCP: differentiated services codepoint
+ * CU: currently unused
+ *
+ * The whole DSCP + CU bits form the DS field.
+ * The DS field is also commonly called TOS or Traffic Class (for IPv6).
+ *
+ * Note: the CU bits are now used for Explicit Congestion Notification
+ * (RFC 3168).
+ */
+
+#ifndef _INET_DSCP_H
+#define _INET_DSCP_H
+
+#include <linux/types.h>
+
+/* Special type for storing DSCP values.
+ *
+ * A dscp_t variable stores a DS field with the CU (ECN) bits cleared.
+ * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding
+ * bugs where ECN bits are erroneously taken into account during FIB lookups
+ * or policy routing.
+ *
+ * Note: to get the real DSCP value contained in a dscp_t variable one would
+ * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have
+ * a helper for that, but there's currently no users.
+ */
+typedef u8 __bitwise dscp_t;
+
+#define INET_DSCP_MASK 0xfc
+
+static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
+{
+ return (__force dscp_t)(dsfield & INET_DSCP_MASK);
+}
+
+static inline __u8 inet_dscp_to_dsfield(dscp_t dscp)
+{
+ return (__force __u8)dscp;
+}
+
+static inline bool inet_validate_dscp(__u8 val)
+{
+ return !(val & ~INET_DSCP_MASK);
+}
+
+#endif /* _INET_DSCP_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index dfd919b3119e..463ae5d33eb0 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -65,13 +65,13 @@ struct inet_timewait_sock {
/* these three are in inet_sock */
__be16 tw_sport;
/* And these are ours. */
- unsigned int tw_kill : 1,
- tw_transparent : 1,
+ unsigned int tw_transparent : 1,
tw_flowlabel : 20,
- tw_pad : 2, /* 2 bits hole */
+ tw_pad : 3, /* 3 bits hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
+ u32 tw_bslot; /* bind bucket slot */
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
};
@@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
-
static inline
struct net *twsk_net(const struct inet_timewait_sock *twsk)
{
diff --git a/include/net/ip.h b/include/net/ip.h
index b51bae43b0dd..3984f2c39c4b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -517,7 +517,6 @@ void ip_dst_metrics_put(struct dst_entry *dst)
kfree(p);
}
-u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
@@ -712,7 +711,7 @@ int ip_forward(struct sk_buff *skb);
*/
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
- __be32 daddr, struct rtable *rt, int is_frag);
+ __be32 daddr, struct rtable *rt);
int __ip_options_echo(struct net *net, struct ip_options *dopt,
struct sk_buff *skb, const struct ip_options *sopt);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c4297704bbcb..6a82bcb8813b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -17,6 +17,7 @@
#include <linux/rcupdate.h>
#include <net/fib_notifier.h>
#include <net/fib_rules.h>
+#include <net/inet_dscp.h>
#include <net/inetpeer.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
@@ -24,7 +25,7 @@
struct fib_config {
u8 fc_dst_len;
- u8 fc_tos;
+ dscp_t fc_dscp;
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3afcb128e064..f693784e1419 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -15,9 +15,9 @@
#include <linux/refcount.h>
#include <linux/jump_label_ratelimit.h>
#include <net/if_inet6.h>
-#include <net/ndisc.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
+#include <net/inet_dscp.h>
#include <net/snmp.h>
#include <net/netns/hash.h>
@@ -437,8 +437,16 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
struct ipv6_txoptions *opt,
int newtype,
struct ipv6_opt_hdr *newopt);
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
- struct ipv6_txoptions *opt);
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+ struct ipv6_txoptions *opt);
+
+static inline struct ipv6_txoptions *
+ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt)
+{
+ if (!opt)
+ return NULL;
+ return __ipv6_fixup_options(opt_space, opt);
+}
bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
const struct inet6_skb_parm *opt);
@@ -967,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo)
return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
}
+static inline dscp_t ip6_dscp(__be32 flowinfo)
+{
+ return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
+}
+
static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
{
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
@@ -1020,7 +1033,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+ struct ipcm6_cookie *ipc6,
struct rt6_info *rt, unsigned int flags,
struct inet_cork_full *cork);
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 0a4779175a52..5052c66e22d2 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _IPV6_FRAG_H
#define _IPV6_FRAG_H
+#include <linux/icmpv6.h>
#include <linux/kernel.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index d524ffb9eb25..2c3bbc6645ba 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -464,6 +464,12 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
* ieee802154_wake_queue - wake ieee802154 queue
* @hw: pointer as obtained from ieee802154_alloc_hw().
*
+ * Tranceivers usually have either one transmit framebuffer or one framebuffer
+ * for both transmitting and receiving. Hence, the core currently only handles
+ * one frame at a time for each phy, which means we had to stop the queue to
+ * avoid new skb to come during the transmission. The queue then needs to be
+ * woken up after the operation.
+ *
* Drivers should use this function instead of netif_wake_queue.
*/
void ieee802154_wake_queue(struct ieee802154_hw *hw);
@@ -472,6 +478,12 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw);
* ieee802154_stop_queue - stop ieee802154 queue
* @hw: pointer as obtained from ieee802154_alloc_hw().
*
+ * Tranceivers usually have either one transmit framebuffer or one framebuffer
+ * for both transmitting and receiving. Hence, the core currently only handles
+ * one frame at a time for each phy, which means we need to tell upper layers to
+ * stop giving us new skbs while we are busy with the transmitted one. The queue
+ * must then be stopped before transmitting.
+ *
* Drivers should use this function instead of netif_stop_queue.
*/
void ieee802154_stop_queue(struct ieee802154_hw *hw);
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 7e35ec79b909..e80a4baf8379 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -45,6 +45,11 @@ static inline bool mctp_address_ok(mctp_eid_t eid)
return eid >= 8 && eid < 255;
}
+static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid)
+{
+ return match == eid || match == MCTP_ADDR_ANY;
+}
+
static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
{
return (struct mctp_hdr *)skb_network_header(skb);
@@ -121,7 +126,7 @@ struct mctp_sock {
*/
struct mctp_sk_key {
mctp_eid_t peer_addr;
- mctp_eid_t local_addr;
+ mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */
__u8 tag; /* incoming tag match; invert TO for local */
/* we hold a ref to sk when set */
@@ -158,6 +163,12 @@ struct mctp_sk_key {
*/
unsigned long dev_flow_state;
struct mctp_dev *dev;
+
+ /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire
+ * automatically on timeout or response, instead SIOCMCTPDROPTAG
+ * is used.
+ */
+ bool manual_alloc;
};
struct mctp_skb_cb {
@@ -234,6 +245,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
void mctp_key_unref(struct mctp_sk_key *key);
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr, mctp_eid_t saddr,
+ bool manual, u8 *tagp);
/* routing <--> device interface */
unsigned int mctp_default_net(struct net *net);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5b61c462e534..c4f5601f6e32 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -63,7 +63,7 @@ struct net {
*/
spinlock_t rules_mod_lock;
- unsigned int dev_unreg_count;
+ atomic_t dev_unreg_count;
unsigned int dev_base_seq; /* protected by rtnl_mutex */
int ifindex;
@@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net)
atomic_inc(&net->fnhe_genid);
}
+#ifdef CONFIG_NET
+void net_ns_init(void);
+#else
+static inline void net_ns_init(void) {}
+#endif
+
#endif /* __NET_NET_NAMESPACE_H */
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 7f44a771530e..4b2b7f8914ea 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -78,7 +78,6 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
void nf_conntrack_acct_pernet_init(struct net *net);
-int nf_conntrack_acct_init(void);
void nf_conntrack_acct_fini(void);
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
new file mode 100644
index 000000000000..a473b56842c5
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bpf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index d932e22edcb4..6c4c490a3e34 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -21,10 +21,10 @@ enum nf_ct_ecache_state {
struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
- u16 missed; /* missed events */
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
enum nf_ct_ecache_state state:8;/* ecache state */
+ u32 missed; /* missed events */
u32 portid; /* netlink portid of destroyer */
};
@@ -166,9 +166,6 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state);
void nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_fini(struct net *net);
-int nf_conntrack_ecache_init(void);
-void nf_conntrack_ecache_fini(void);
-
static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net)
{
return net->ct.ecache_dwork_pending;
@@ -194,16 +191,6 @@ static inline void nf_conntrack_ecache_pernet_init(struct net *net)
static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
{
}
-
-static inline int nf_conntrack_ecache_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_ecache_fini(void)
-{
-}
-
static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; }
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
#endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index c7515d82ab06..96635ad2acc7 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -49,7 +49,7 @@ enum nf_ct_ext_id {
struct nf_ct_ext {
u8 offset[NF_CT_EXT_NUM];
u8 len;
- char data[];
+ char data[] __aligned(8);
};
static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id)
@@ -72,23 +72,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
#define nf_ct_ext_find(ext, id) \
((id##_TYPE *)__nf_ct_ext_find((ext), (id)))
-/* Destroy all relationships */
-void nf_ct_ext_destroy(struct nf_conn *ct);
-
/* Add this type, returns pointer to data or NULL. */
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
-struct nf_ct_ext_type {
- /* Destroys relationships (can be NULL). */
- void (*destroy)(struct nf_conn *ct);
-
- enum nf_ct_ext_id id;
-
- /* Length and min alignment. */
- u8 len;
- u8 align;
-};
-
-int nf_ct_extend_register(const struct nf_ct_ext_type *type);
-void nf_ct_extend_unregister(const struct nf_ct_ext_type *type);
#endif /* _NF_CONNTRACK_EXTEND_H */
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index ba916411c4e1..3c23298e68ca 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -45,12 +45,9 @@ int nf_connlabels_replace(struct nf_conn *ct,
#ifdef CONFIG_NF_CONNTRACK_LABELS
int nf_conntrack_labels_init(void);
-void nf_conntrack_labels_fini(void);
int nf_connlabels_get(struct net *net, unsigned int bit);
void nf_connlabels_put(struct net *net);
#else
-static inline int nf_conntrack_labels_init(void) { return 0; }
-static inline void nf_conntrack_labels_fini(void) {}
static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; }
static inline void nf_connlabels_put(struct net *net) {}
#endif
diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h
index 0a10b50537ae..883c414b768e 100644
--- a/include/net/netfilter/nf_conntrack_seqadj.h
+++ b/include/net/netfilter/nf_conntrack_seqadj.h
@@ -42,7 +42,4 @@ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, unsigned int protoff);
s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq);
-int nf_conntrack_seqadj_init(void);
-void nf_conntrack_seqadj_fini(void);
-
#endif /* _NF_CONNTRACK_SEQADJ_H */
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 659b0ea25b4d..3ea94f6f3844 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -89,23 +89,11 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-int nf_conntrack_timeout_init(void);
-void nf_conntrack_timeout_fini(void);
void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout);
int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num,
const char *timeout_name);
void nf_ct_destroy_timeout(struct nf_conn *ct);
#else
-static inline int nf_conntrack_timeout_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_timeout_fini(void)
-{
- return;
-}
-
static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
u8 l3num, u8 l4num,
const char *timeout_name)
@@ -120,8 +108,12 @@ static inline void nf_ct_destroy_timeout(struct nf_conn *ct)
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
-extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout);
+struct nf_ct_timeout_hooks {
+ struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name);
+ void (*timeout_put)(struct nf_ct_timeout *timeout);
+};
+
+extern const struct nf_ct_timeout_hooks *nf_ct_timeout_hook;
#endif
#endif /* _NF_CONNTRACK_TIMEOUT_H */
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
index 820ea34b6029..57138d974a9f 100644
--- a/include/net/netfilter/nf_conntrack_timestamp.h
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -40,21 +40,8 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
void nf_conntrack_tstamp_pernet_init(struct net *net);
-
-int nf_conntrack_tstamp_init(void);
-void nf_conntrack_tstamp_fini(void);
#else
static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {}
-
-static inline int nf_conntrack_tstamp_init(void)
-{
- return 0;
-}
-
-static inline void nf_conntrack_tstamp_fini(void)
-{
- return;
-}
#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */
#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index b6fb1fdff9b2..0ea7c55cea4d 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -42,6 +42,14 @@ struct nft_cmp_fast_expr {
bool inv;
};
+struct nft_cmp16_fast_expr {
+ struct nft_data data;
+ struct nft_data mask;
+ u8 sreg;
+ u8 len;
+ bool inv;
+};
+
struct nft_immediate_expr {
struct nft_data data;
u8 dreg;
@@ -59,6 +67,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len)
}
extern const struct nft_expr_ops nft_cmp_fast_ops;
+extern const struct nft_expr_ops nft_cmp16_fast_ops;
struct nft_payload {
enum nft_payload_bases base:8;
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 552bc25b1933..388244e315e7 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -10,6 +10,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
struct prot_inuse __percpu *prot_inuse;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 78557643526e..f0687867b5cd 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -31,18 +31,16 @@ struct ping_group_range {
struct inet_hashinfo;
struct inet_timewait_death_row {
- atomic_t tw_count;
- char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
+ refcount_t tw_refcount;
- struct inet_hashinfo *hashinfo;
+ struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
int sysctl_max_tw_buckets;
};
struct tcp_fastopen_context;
struct netns_ipv4 {
- /* Please keep tcp_death_row at first field in netns_ipv4 */
- struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+ struct inet_timewait_death_row *tcp_death_row;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -70,11 +68,9 @@ struct netns_ipv4 {
struct hlist_head *fib_table_hash;
struct sock *fibnl;
- struct sock * __percpu *icmp_sk;
struct sock *mc_autojoin_sk;
struct inet_peer_base *peers;
- struct sock * __percpu *tcp_sk;
struct fqdir *fqdir;
u8 sysctl_icmp_echo_ignore_all;
@@ -87,6 +83,7 @@ struct netns_ipv4 {
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
+ int ip_rt_min_advmss;
struct local_ports ip_local_ports;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index a4b550380316..d145f1966682 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -88,11 +88,15 @@ struct netns_ipv6 {
struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops;
#endif
- struct sock * __percpu *icmp_sk;
struct sock *ndisc_sk;
struct sock *tcp_sk;
struct sock *igmp_sk;
struct sock *mc_autojoin_sk;
+
+ struct hlist_head *inet6_addr_lst;
+ spinlock_t addrconf_hash_lock;
+ struct delayed_work addr_chk_work;
+
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt6;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 79a805542d0f..97c3c19872ff 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -201,21 +201,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
#endif
-void page_pool_put_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct);
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
-/* Same as above but will try to sync the entire area pool->max_len */
-static inline void page_pool_put_full_page(struct page_pool *pool,
- struct page *page, bool allow_direct)
+static inline void page_pool_fragment_page(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_defrag_page(struct page *page, long nr)
+{
+ long ret;
+
+ /* If nr == pp_frag_count then we have cleared all remaining
+ * references to the page. No need to actually overwrite it, instead
+ * we can leave this to be overwritten by the calling function.
+ *
+ * The main advantage to doing this is that an atomic_read is
+ * generally a much cheaper operation than an atomic update,
+ * especially when dealing with a page that may be partitioned
+ * into only 2 or 3 pieces.
+ */
+ if (atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
+}
+
+static inline bool page_pool_is_last_frag(struct page_pool *pool,
+ struct page *page)
+{
+ /* If fragments aren't enabled or count is 0 we were the last user */
+ return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ (page_pool_defrag_page(page, 1) == 0);
+}
+
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- page_pool_put_page(pool, page, -1, allow_direct);
+ if (!page_pool_is_last_frag(pool, page))
+ return;
+
+ page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
#endif
}
+/* Same as above but will try to sync the entire area pool->max_len */
+static inline void page_pool_put_full_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ page_pool_put_page(pool, page, -1, allow_direct);
+}
+
/* Same as above but the caller must guarantee safe context. e.g NAPI */
static inline void page_pool_recycle_direct(struct page_pool *pool,
struct page *page)
@@ -243,30 +289,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
page->dma_addr_upper = upper_32_bits(addr);
}
-static inline void page_pool_set_frag_count(struct page *page, long nr)
-{
- atomic_long_set(&page->pp_frag_count, nr);
-}
-
-static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
- long nr)
-{
- long ret;
-
- /* As suggested by Alexander, atomic_long_read() may cover up the
- * reference count errors, so avoid calling atomic_long_read() in
- * the cases of freeing or draining the page_frags, where we would
- * not expect it to match or that are slowpath anyway.
- */
- if (__builtin_constant_p(nr) &&
- atomic_long_read(&page->pp_frag_count) == nr)
- return 0;
-
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
- WARN_ON(ret < 0);
- return ret;
-}
-
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 676cb8ea9e15..a3b57a93228a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload {
};
};
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+void tc_skb_ext_tc_enable(void);
+void tc_skb_ext_tc_disable(void);
+#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_enable(void) { }
+static inline void tc_skb_ext_tc_disable(void) { }
+#define tc_skb_ext_tc_enabled() false
+#endif
+
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9e7b21c0b3a6..44a35531952e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void)
return PSCHED_NS2TICKS(ktime_get_ns());
}
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
- return min(tv1 - tv2, bound);
-}
-
struct qdisc_watchdog {
u64 last_expires;
struct hrtimer timer;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 29e41ff3ec93..144c39db9898 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -70,6 +70,7 @@ struct request_sock {
struct saved_syn *saved_syn;
u32 secid;
u32 peer_secid;
+ u32 timeout;
};
static inline struct request_sock *inet_reqsk(const struct sock *sk)
@@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
+ req->timeout = 0;
req->num_timeout = 0;
req->num_retrans = 0;
req->sk = NULL;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 472843eedbae..9bab396c1f3b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
- return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
diff --git a/include/net/sock.h b/include/net/sock.h
index ff9b508d9c5f..d6c13f0fba40 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -316,6 +316,7 @@ struct sk_filter;
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_txhash: computed flow hash for use on transmit
+ * @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
@@ -491,6 +492,7 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
+ u8 sk_txrehash;
#ifdef CONFIG_NET_RX_BUSY_POLL
u8 sk_prefer_busy_poll;
u16 sk_busy_poll_budget;
@@ -587,6 +589,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
__tmp | SK_USER_DATA_NOCOPY); \
})
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+ return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+ write_pnet(&sk->sk_net, net);
+}
+
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
* or not whether his port will be reused by someone else. SK_FORCE_REUSE
@@ -2054,7 +2068,7 @@ static inline void sk_set_txhash(struct sock *sk)
static inline bool sk_rethink_txhash(struct sock *sk)
{
- if (sk->sk_txhash) {
+ if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
sk_set_txhash(sk);
return true;
}
@@ -2704,18 +2718,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static inline
-struct net *sock_net(const struct sock *sk)
-{
- return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
- write_pnet(&sk->sk_net, net);
-}
-
static inline bool
skb_sk_is_prefetched(struct sk_buff *skb)
{
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b9fc978fb2ca..eff2487d972d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2358,7 +2358,7 @@ static inline u32 tcp_timeout_init(struct sock *sk)
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
- return timeout;
+ return min_t(int, timeout, TCP_RTO_MAX);
}
static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 9185e45b997f..a3c53110d30b 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
return 0;
}
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
- const struct udp_sock *up = udp_sk(skb->sk);
- int cscov = up->len;
- __wsum csum = 0;
-
- if (up->pcflag & UDPLITE_SEND_CC) {
- /*
- * Sender has set `partial coverage' option on UDP-Lite socket.
- * The special case "up->pcslen == 0" signifies full coverage.
- */
- if (up->pcslen < up->len) {
- if (0 < up->pcslen)
- cscov = up->pcslen;
- udp_hdr(skb)->len = htons(up->pcslen);
- }
- /*
- * NOTE: Causes for the error case `up->pcslen > up->len':
- * (i) Application error (will not be penalized).
- * (ii) Payload too big for send buffer: data is split
- * into several packets, each with its own header.
- * In this case (e.g. last segment), coverage may
- * exceed packet length.
- * Since packets with coverage length > packet length are
- * illegal, we fall back to the defaults here.
- */
- }
-
- skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
-
- skb_queue_walk(&sk->sk_write_queue, skb) {
- const int off = skb_transport_offset(skb);
- const int len = skb->len - off;
-
- csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
- if ((cscov -= len) <= 0)
- break;
- }
- return csum;
-}
-
/* Fast-path computation of checksum. Socket may not be locked. */
static inline __wsum udplite_csum(struct sk_buff *skb)
{
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 8f0812e4996d..b7721c3e4d1f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -60,12 +60,20 @@ struct xdp_rxq_info {
u32 reg_state;
struct xdp_mem_info mem;
unsigned int napi_id;
+ u32 frag_size;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
struct xdp_txq_info {
struct net_device *dev;
};
+enum xdp_buff_flags {
+ XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */
+ XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under
+ * pressure
+ */
+};
+
struct xdp_buff {
void *data;
void *data_end;
@@ -74,13 +82,40 @@ struct xdp_buff {
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+ u32 flags; /* supported values defined in xdp_buff_flags */
};
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+ xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
xdp->frame_sz = frame_sz;
xdp->rxq = rxq;
+ xdp->flags = 0;
}
static __always_inline void
@@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+ unsigned int len = xdp->data_end - xdp->data;
+ struct skb_shared_info *sinfo;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ len += sinfo->xdp_frags_size;
+out:
+ return len;
+}
+
struct xdp_frame {
void *data;
u16 len;
@@ -122,8 +171,19 @@ struct xdp_frame {
*/
struct xdp_mem_info mem;
struct net_device *dev_rx; /* used by cpumap */
+ u32 flags; /* supported values defined in xdp_buff_flags */
};
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+ return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
@@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
frame->dev_rx = NULL;
}
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+ unsigned int size, unsigned int truesize,
+ bool pfmemalloc)
+{
+ skb_shinfo(skb)->nr_frags = nr_frags;
+
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += truesize;
+ skb->pfmemalloc |= pfmemalloc;
+}
+
/* Avoids inlining WARN macro in fast-path */
void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
@@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
xdp->data_end = frame->data + frame->len;
xdp->data_meta = frame->data - frame->metasize;
xdp->frame_sz = frame->frame_sz;
+ xdp->flags = frame->flags;
}
static inline
@@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
xdp_frame->frame_sz = xdp->frame_sz;
+ xdp_frame->flags = xdp->flags;
return 0;
}
@@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
return xdp_frame;
}
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
@@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
+ struct skb_shared_info *sinfo;
+ int i;
/* Curr only page_pool needs this */
- if (mem->type == MEM_TYPE_PAGE_POOL)
- __xdp_release_frame(xdpf->data, mem);
+ if (mem->type != MEM_TYPE_PAGE_POOL)
+ return;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_release_frame(page_address(page), mem);
+ }
+out:
+ __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id)
+{
+ return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
}
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index, unsigned int napi_id);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 443d45951564..4aa031849668 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -13,7 +13,7 @@
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max);
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
void xsk_tx_release(struct xsk_buff_pool *pool);
struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
u16 queue_id);
@@ -142,8 +142,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
return false;
}
-static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc,
- u32 max)
+static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max)
{
return 0;
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index ddeefc4a1040..5554ee75e7da 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -60,6 +60,7 @@ struct xsk_buff_pool {
*/
dma_addr_t *dma_pages;
struct xdp_buff_xsk *heads;
+ struct xdp_desc *tx_descs;
u64 chunk_mask;
u64 addrs_cnt;
u32 free_list_cnt;
diff --git a/include/trace/events/mctp.h b/include/trace/events/mctp.h
index 175b057c507f..165cf25f77a7 100644
--- a/include/trace/events/mctp.h
+++ b/include/trace/events/mctp.h
@@ -15,6 +15,7 @@ enum {
MCTP_TRACE_KEY_REPLIED,
MCTP_TRACE_KEY_INVALIDATED,
MCTP_TRACE_KEY_CLOSED,
+ MCTP_TRACE_KEY_DROPPED,
};
#endif /* __TRACE_MCTP_ENUMS */
@@ -22,6 +23,7 @@ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED);
+TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED);
TRACE_EVENT(mctp_key_acquire,
TP_PROTO(const struct mctp_sk_key *key),
@@ -66,7 +68,8 @@ TRACE_EVENT(mctp_key_release,
{ MCTP_TRACE_KEY_TIMEOUT, "timeout" },
{ MCTP_TRACE_KEY_REPLIED, "replied" },
{ MCTP_TRACE_KEY_INVALIDATED, "invalidated" },
- { MCTP_TRACE_KEY_CLOSED, "closed" })
+ { MCTP_TRACE_KEY_CLOSED, "closed" },
+ { MCTP_TRACE_KEY_DROPPED, "dropped" })
)
);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index a8a64b97504d..cfcfd26399f7 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -16,6 +16,17 @@
EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \
EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
+ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \
+ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \
+ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \
+ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \
+ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \
+ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \
+ UNICAST_IN_L2_MULTICAST) \
+ EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \
+ EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \
+ EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \
+ EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \
EMe(SKB_DROP_REASON_MAX, MAX)
#undef EM
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c77a1313b3b0..467ca2f28760 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -128,6 +128,8 @@
#define SO_RESERVE_MEM 73
+#define SO_TXREHASH 74
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..afe3d0d7f5f2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
* *ctx_out*, *data_in* and *data_out* must be NULL.
* *repeat* must be zero.
*
+ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
*/
#define BPF_F_SLEEPABLE (1U << 4)
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1775,6 +1782,8 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
+ * Description
+ * Get the current pid and tgid.
* Return
* A 64-bit integer containing the current tgid and pid, and
* created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
* *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
+ * Description
+ * Get the current uid and gid.
* Return
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
* The 32-bit hash.
*
* u64 bpf_get_current_task(void)
+ * Description
+ * Get the current task.
* Return
* A pointer to the current task struct.
*
@@ -2369,6 +2382,8 @@ union bpf_attr {
* indicate that the hash is outdated and to trigger a
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * Return
+ * void.
*
* long bpf_get_numa_node_id(void)
* Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Description
+ * Get the owner UID of the socked associated to *skb*.
* Return
* The owner UID of the socket associated to *skb*. If the socket
* is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
+ * Description
+ * Get the current cgroup id based on the cgroup within which
+ * the current task is running.
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
@@ -5018,6 +5038,54 @@ union bpf_attr {
*
* Return
* The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ * Description
+ * Get the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ * Description
+ * Set the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ * Description
+ * Get the total size of a given xdp buff (linear and paged area)
+ * Return
+ * The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * This helper is provided as an easy way to load data from a
+ * xdp buffer. It can be used to load *len* bytes from *offset* from
+ * the frame associated to *xdp_md*, into the buffer pointed by
+ * *buf*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * Store *len* bytes from buffer *buf* into the frame
+ * associated to *xdp_md*, at *offset*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ * address space, and stores the data in *dst*. *flags* is not
+ * used yet and is provided for future extensibility. This helper
+ * can only be used by sleepable programs.
+ * Return
+ * 0 on success, or a negative error in case of failure. On error
+ * *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5206,6 +5274,12 @@ union bpf_attr {
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
+ FN(get_retval), \
+ FN(set_retval), \
+ FN(xdp_get_buff_len), \
+ FN(xdp_load_bytes), \
+ FN(xdp_store_bytes), \
+ FN(copy_from_user_task), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5500,7 +5574,8 @@ struct bpf_sock {
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
- __u32 dst_port; /* network byte order */
+ __be16 dst_port; /* network byte order */
+ __u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
@@ -6378,7 +6453,8 @@ struct bpf_sk_lookup {
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
- __u32 remote_port; /* Network byte order */
+ __be16 remote_port; /* Network byte order */
+ __u16 :16; /* Zero padding */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index cca6e474a085..417d4280d7b5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -319,6 +319,12 @@ enum {
/* RINGS */
enum {
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
+ ETHTOOL_TCP_DATA_SPLIT_DISABLED,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED,
+};
+
+enum {
ETHTOOL_A_RINGS_UNSPEC,
ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */
ETHTOOL_A_RINGS_RX_MAX, /* u32 */
@@ -330,6 +336,7 @@ enum {
ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */
ETHTOOL_A_RINGS_TX, /* u32 */
ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */
+ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */
/* add new constants above here */
__ETHTOOL_A_RINGS_CNT,
diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
index 829ffdfcacca..38f6a8fdfd34 100644
--- a/include/uapi/linux/ioam6_iptunnel.h
+++ b/include/uapi/linux/ioam6_iptunnel.h
@@ -41,6 +41,15 @@ enum {
/* IOAM Trace Header */
IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */
+ /* Insertion frequency:
+ * "k over n" packets (0 < k <= n)
+ * [0.0001% ... 100%]
+ */
+#define IOAM6_IPTUNNEL_FREQ_MIN 1
+#define IOAM6_IPTUNNEL_FREQ_MAX 1000000
+ IOAM6_IPTUNNEL_FREQ_K, /* u32 */
+ IOAM6_IPTUNNEL_FREQ_N, /* u32 */
+
__IOAM6_IPTUNNEL_MAX,
};
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 07b0318716fc..154ab56651f1 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -44,7 +44,25 @@ struct sockaddr_mctp_ext {
#define MCTP_TAG_MASK 0x07
#define MCTP_TAG_OWNER 0x08
+#define MCTP_TAG_PREALLOC 0x10
#define MCTP_OPT_ADDR_EXT 1
+#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0)
+#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1)
+
+struct mctp_ioc_tag_ctl {
+ mctp_eid_t peer_addr;
+
+ /* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will
+ * populate with the allocated tag value. Returned tag value will
+ * always have TO and PREALLOC set.
+ *
+ * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from
+ * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set).
+ */
+ __u8 tag;
+ __u16 flags;
+};
+
#endif /* __UAPI_MCTP_H */
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 66048cc5d7b3..1bbea8f0681e 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -93,6 +93,7 @@ enum net_dm_attr {
NET_DM_ATTR_SW_DROPS, /* flag */
NET_DM_ATTR_HW_DROPS, /* flag */
NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */
+ NET_DM_ATTR_REASON, /* string */
__NET_DM_ATTR_MAX,
NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index aed90c4df0c8..ef7c97f21a15 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -61,6 +61,7 @@ enum nfqnl_attr_type {
NFQA_SECCTX, /* security context string */
NFQA_VLAN, /* nested attribute: packet vlan info */
NFQA_L2HDR, /* full L2 header */
+ NFQA_PRIORITY, /* skb->priority */
__NFQA_MAX
};
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index eb0a9a5b6e71..51d6bb2f6765 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,4 +31,8 @@ struct __kernel_sockaddr_storage {
#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+#define SOCK_TXREHASH_DEFAULT ((u8)-1)
+#define SOCK_TXREHASH_DISABLED 0
+#define SOCK_TXREHASH_ENABLED 1
+
#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/init/Kconfig b/init/Kconfig
index e9119bf54b1f..7328d4f25370 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -86,6 +86,10 @@ config CC_HAS_ASM_INLINE
config CC_HAS_NO_PROFILE_FN_ATTR
def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
+config PAHOLE_VERSION
+ int
+ default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
+
config CONSTRUCTORS
bool
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c0..ada50f5a15e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -99,6 +99,7 @@
#include <linux/kcsan.h>
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
+#include <net/net_namespace.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -1116,6 +1117,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
key_init();
security_init();
dbg_late_init();
+ net_ns_init();
vfs_caches_init();
pagecache_init();
signals_init();
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c7a5be3bf8be..7f145aefbff8 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
- if (!bpf_prog_array_compatible(array, prog)) {
+ if (!bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
@@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
INIT_LIST_HEAD(&aux->poke_progs);
mutex_init(&aux->poke_mutex);
- spin_lock_init(&aux->owner.lock);
map = array_map_alloc(attr);
if (IS_ERR(map)) {
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
struct bpf_iter_target_info {
struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
{
int ret;
- rcu_read_lock();
- migrate_disable();
- ret = bpf_prog_run(prog, ctx);
- migrate_enable();
- rcu_read_unlock();
+ if (prog->aux->sleepable) {
+ rcu_read_lock_trace();
+ migrate_disable();
+ might_fault();
+ ret = bpf_prog_run(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock_trace();
+ } else {
+ rcu_read_lock();
+ migrate_disable();
+ ret = bpf_prog_run(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock();
+ }
/* bpf program can only return 0 or 1:
* 0 : okay
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e16dafeb2450..11740b300de9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -198,6 +198,21 @@
DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
+enum btf_kfunc_hook {
+ BTF_KFUNC_HOOK_XDP,
+ BTF_KFUNC_HOOK_TC,
+ BTF_KFUNC_HOOK_STRUCT_OPS,
+ BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+ BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+ struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
struct btf {
void *data;
struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
refcount_t refcnt;
u32 id;
struct rcu_head rcu;
+ struct btf_kfunc_set_tab *kfunc_set_tab;
/* split BTF support */
struct btf *base_btf;
@@ -403,6 +419,9 @@ static struct btf_type btf_void;
static int btf_resolve(struct btf_verifier_env *env,
const struct btf_type *t, u32 type_id);
+static int btf_func_check(struct btf_verifier_env *env,
+ const struct btf_type *t);
+
static bool btf_type_is_modifier(const struct btf_type *t)
{
/* Some of them is not strictly a C modifier
@@ -579,6 +598,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
+ btf_type_is_func(t) ||
btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
@@ -1531,8 +1551,30 @@ static void btf_free_id(struct btf *btf)
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+ struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+ int hook, type;
+
+ if (!tab)
+ return;
+ /* For module BTF, we directly assign the sets being registered, so
+ * there is nothing to free except kfunc_set_tab.
+ */
+ if (btf_is_module(btf))
+ goto free_tab;
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+ for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+ kfree(tab->sets[hook][type]);
+ }
+free_tab:
+ kfree(tab);
+ btf->kfunc_set_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
+ btf_free_kfunc_set_tab(btf);
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
@@ -3533,9 +3575,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env,
return 0;
}
+static int btf_func_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ int err;
+
+ err = btf_func_check(env, t);
+ if (err)
+ return err;
+
+ env_stack_pop_resolved(env, next_type_id, 0);
+ return 0;
+}
+
static struct btf_kind_operations func_ops = {
.check_meta = btf_func_check_meta,
- .resolve = btf_df_resolve,
+ .resolve = btf_func_resolve,
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_ref_type_log,
@@ -4156,7 +4213,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
return !btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
- if (btf_type_is_decl_tag(t))
+ if (btf_type_is_decl_tag(t) || btf_type_is_func(t))
return btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
@@ -4246,12 +4303,6 @@ static int btf_check_all_types(struct btf_verifier_env *env)
if (err)
return err;
}
-
- if (btf_type_is_func(t)) {
- err = btf_func_check(env, t);
- if (err)
- return err;
- }
}
return 0;
@@ -4848,6 +4899,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
+ const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -5000,6 +5052,13 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
+
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tag_value, "user") == 0)
+ info->reg_type |= MEM_USER;
+ }
+
/* skip modifiers */
while (btf_type_is_modifier(t)) {
info->btf_id = t->type;
@@ -5026,12 +5085,12 @@ enum bpf_struct_walk_result {
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const char *tname, *mname;
+ const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
again:
@@ -5215,7 +5274,8 @@ error:
}
if (btf_type_is_ptr(mtype)) {
- const struct btf_type *stype;
+ const struct btf_type *stype, *t;
+ enum bpf_type_flag tmp_flag = 0;
u32 id;
if (msize != size || off != moff) {
@@ -5224,9 +5284,19 @@ error:
mname, moff, tname, off, size);
return -EACCES;
}
+
+ /* check __user tag */
+ t = btf_type_by_id(btf, mtype->type);
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tag_value, "user") == 0)
+ tmp_flag = MEM_USER;
+ }
+
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
+ *flag = tmp_flag;
return WALK_PTR;
}
}
@@ -5253,13 +5323,14 @@ error:
int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype __maybe_unused,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ enum bpf_type_flag tmp_flag = 0;
int err;
u32 id;
do {
- err = btf_struct_walk(log, btf, t, off, size, &id);
+ err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
switch (err) {
case WALK_PTR:
@@ -5267,6 +5338,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
* we're done.
*/
*next_btf_id = id;
+ *flag = tmp_flag;
return PTR_TO_BTF_ID;
case WALK_SCALAR:
return SCALAR_VALUE;
@@ -5311,6 +5383,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *need_btf, u32 need_type_id)
{
const struct btf_type *type;
+ enum bpf_type_flag flag;
int err;
/* Are we already done? */
@@ -5321,7 +5394,7 @@ again:
type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, btf, type, off, 1, &id);
+ err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
if (err != WALK_STRUCT)
return false;
@@ -5616,17 +5689,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
return true;
}
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ int len, sfx_len = sizeof("__sz") - 1;
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len < sfx_len)
+ return false;
+ param_name += len - sfx_len;
+ if (strncmp(param_name, "__sz", sfx_len))
+ return false;
+
+ return true;
+}
+
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
bool ptr_to_mem_ok)
{
struct bpf_verifier_log *log = &env->log;
+ u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
- u32 i, nargs, ref_id;
+ int ref_regno = 0;
+ bool rel = false;
t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
@@ -5704,6 +5805,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
+ /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+ if (reg->ref_obj_id) {
+ if (ref_obj_id) {
+ bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id, ref_obj_id);
+ return -EFAULT;
+ }
+ ref_regno = regno;
+ ref_obj_id = reg->ref_obj_id;
+ }
} else {
reg_btf = btf_vmlinux;
reg_ref_id = *reg2btf_ids[reg->type];
@@ -5727,17 +5838,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
u32 type_size;
if (is_kfunc) {
+ bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
/* Permit pointer to mem, but only when argument
* type is pointer to scalar, or struct composed
* (recursively) of scalars.
+ * When arg_mem_size is true, the pointer can be
+ * void *.
*/
if (!btf_type_is_scalar(ref_t) &&
- !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+ !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+ (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
bpf_log(log,
- "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
- i, btf_type_str(ref_t), ref_tname);
+ "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+ i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
return -EINVAL;
}
+
+ /* Check for mem, len pair */
+ if (arg_mem_size) {
+ if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+ bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+ i, i + 1);
+ return -EINVAL;
+ }
+ i++;
+ continue;
+ }
}
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5758,7 +5885,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
}
}
- return 0;
+ /* Either both are set, or neither */
+ WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+ if (is_kfunc) {
+ rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RELEASE, func_id);
+ /* We already made sure ref_obj_id is set only for one argument */
+ if (rel && !ref_obj_id) {
+ bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+ func_name);
+ return -EINVAL;
+ }
+ /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+ * other kfuncs works
+ */
+ }
+ /* returns argument register number > 0 in case of reference release kfunc */
+ return rel ? ref_regno : 0;
}
/* Compare BTF of a function with given bpf_reg_state.
@@ -6200,12 +6343,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+enum {
+ BTF_MODULE_F_LIVE = (1 << 0),
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module {
struct list_head list;
struct module *module;
struct btf *btf;
struct bin_attribute *sysfs_attr;
+ int flags;
};
static LIST_HEAD(btf_modules);
@@ -6233,7 +6381,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
int err = 0;
if (mod->btf_data_size == 0 ||
- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+ op != MODULE_STATE_GOING))
goto out;
switch (op) {
@@ -6292,6 +6441,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
}
break;
+ case MODULE_STATE_LIVE:
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_mod->flags |= BTF_MODULE_F_LIVE;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+ break;
case MODULE_STATE_GOING:
mutex_lock(&btf_module_mutex);
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6338,7 +6498,12 @@ struct module *btf_try_get_module(const struct btf *btf)
if (btf_mod->btf != btf)
continue;
- if (try_module_get(btf_mod->module))
+ /* We must only consider module whose __init routine has
+ * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+ * which is set from the notifier callback for
+ * MODULE_STATE_LIVE.
+ */
+ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
res = btf_mod->module;
break;
@@ -6349,6 +6514,36 @@ struct module *btf_try_get_module(const struct btf *btf)
return res;
}
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+ struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ struct btf_module *btf_mod, *tmp;
+#endif
+
+ if (!module)
+ return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_get(btf_mod->btf);
+ btf = btf_mod->btf;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+#endif
+
+ return btf;
+}
+
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
struct btf *btf;
@@ -6416,58 +6611,300 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
-/* BTF ID set registration API for modules */
+/* Kernel Function (kfunc) BTF ID set registration API */
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ struct btf_id_set *add_set, bool vmlinux_set)
+{
+ struct btf_kfunc_set_tab *tab;
+ struct btf_id_set *set;
+ u32 set_cnt;
+ int ret;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (!add_set->cnt)
+ return 0;
+
+ tab = btf->kfunc_set_tab;
+ if (!tab) {
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return -ENOMEM;
+ btf->kfunc_set_tab = tab;
+ }
+
+ set = tab->sets[hook][type];
+ /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+ * for module sets.
+ */
+ if (WARN_ON_ONCE(set && !vmlinux_set)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* We don't need to allocate, concatenate, and sort module sets, because
+ * only one is allowed per hook. Hence, we can directly assign the
+ * pointer and return.
+ */
+ if (!vmlinux_set) {
+ tab->sets[hook][type] = add_set;
+ return 0;
+ }
+
+ /* In case of vmlinux sets, there may be more than one set being
+ * registered per hook. To create a unified set, we allocate a new set
+ * and concatenate all individual sets being registered. While each set
+ * is individually sorted, they may become unsorted when concatenated,
+ * hence re-sorting the final set again is required to make binary
+ * searching the set using btf_id_set_contains function work.
+ */
+ set_cnt = set ? set->cnt : 0;
+
+ if (set_cnt > U32_MAX - add_set->cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Grow set */
+ set = krealloc(tab->sets[hook][type],
+ offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!set) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* For newly allocated set, initialize set->cnt to 0 */
+ if (!tab->sets[hook][type])
+ set->cnt = 0;
+ tab->sets[hook][type] = set;
+
+ /* Concatenate the two sets */
+ memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+ set->cnt += add_set->cnt;
+
+ sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_kfunc_set_tab(btf);
+ return ret;
+}
+
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ const struct btf_kfunc_id_set *kset)
+{
+ bool vmlinux_set = !btf_is_module(btf);
+ int type, ret;
+
+ for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+ if (!kset->sets[type])
+ continue;
+
+ ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ u32 kfunc_btf_id)
+{
+ struct btf_id_set *set;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+ return false;
+ if (!btf->kfunc_set_tab)
+ return false;
+ set = btf->kfunc_set_tab->sets[hook][type];
+ if (!set)
+ return false;
+ return btf_id_set_contains(set, kfunc_btf_id);
+}
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
{
- mutex_lock(&l->mutex);
- list_add(&s->list, &l->list);
- mutex_unlock(&l->mutex);
+ switch (prog_type) {
+ case BPF_PROG_TYPE_XDP:
+ return BTF_KFUNC_HOOK_XDP;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return BTF_KFUNC_HOOK_TC;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return BTF_KFUNC_HOOK_STRUCT_OPS;
+ default:
+ return BTF_KFUNC_HOOK_MAX;
+ }
}
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type, u32 kfunc_btf_id)
{
- mutex_lock(&l->mutex);
- list_del_init(&s->list);
- mutex_unlock(&l->mutex);
+ enum btf_kfunc_hook hook;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
}
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner)
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
{
- struct kfunc_btf_id_set *s;
+ enum btf_kfunc_hook hook;
+ struct btf *btf;
+ int ret;
- mutex_lock(&klist->mutex);
- list_for_each_entry(s, &klist->list, list) {
- if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
- mutex_unlock(&klist->mutex);
- return true;
+ btf = btf_get_module_btf(kset->owner);
+ if (!btf) {
+ if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register kfuncs\n");
+ return -ENOENT;
}
+ if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+ pr_err("missing module BTF, cannot register kfuncs\n");
+ return -ENOENT;
+ }
+ return 0;
}
- mutex_unlock(&klist->mutex);
- return false;
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ ret = btf_populate_kfunc_set(btf, hook, kset);
+ /* reference is only taken for module BTF */
+ if (btf_is_module(btf))
+ btf_put(btf);
+ return ret;
}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
-#define DEFINE_KFUNC_BTF_ID_LIST(name) \
- struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
- __MUTEX_INITIALIZER(name.mutex) }; \
- EXPORT_SYMBOL_GPL(name)
+#define MAX_TYPES_ARE_COMPAT_DEPTH 2
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+static
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id,
+ int level)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
-#endif
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf_type_by_id(local_btf, local_id);
+ targ_type = btf_type_by_id(targ_btf, targ_id);
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
+ targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ if (level <= 0)
+ return -EINVAL;
+
+ btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
+ btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
+ err = __bpf_core_types_are_compat(local_btf, local_id,
+ targ_btf, targ_id,
+ level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
+ btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ return 0;
+ }
+}
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- return -EOPNOTSUPP;
+ return __bpf_core_types_are_compat(local_btf, local_id,
+ targ_btf, targ_id,
+ MAX_TYPES_ARE_COMPAT_DEPTH);
}
static bool bpf_core_is_flavor_sep(const char *s)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 514b4681a90a..098632fdbc45 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
* NET_XMIT_CN (2) - continue with packet output and notify TCP
* to call cwr
- * -EPERM - drop packet
+ * -err - drop packet
*
* For ingress packets, this function will return -EPERM if any
* attached program was found and if it returned != 1 during execution.
@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else {
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
- __bpf_prog_run_save_cb);
- ret = (ret == 1 ? 0 : -EPERM);
+ __bpf_prog_run_save_cb, 0);
+ if (ret && !IS_ERR_VALUE((long)ret))
+ ret = -EFAULT;
}
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+ bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
};
struct sockaddr_storage unspec;
struct cgroup *cgrp;
- int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, flags);
-
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
- bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+ bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
.major = major,
.minor = minor,
};
- int allow;
+ int ret;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0);
rcu_read_unlock();
- return !allow;
+ return ret;
+}
+
+BPF_CALL_0(bpf_get_retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+ .func = bpf_get_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ ctx->retval = retval;
+ return 0;
}
+static const struct bpf_func_proto bpf_set_retval_proto = {
+ .func = bpf_set_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
+ case BPF_FUNC_get_retval:
+ return &bpf_get_retval_proto;
+ case BPF_FUNC_set_retval:
+ return &bpf_set_retval_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, 0);
rcu_read_unlock();
kfree(ctx.cur_val);
@@ -1350,24 +1380,10 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
kfree(ctx.new_val);
}
- return ret == 1 ? 0 : -EPERM;
+ return ret;
}
#ifdef CONFIG_NET
-static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
- enum cgroup_bpf_attach_type attach_type)
-{
- struct bpf_prog_array *prog_array;
- bool empty;
-
- rcu_read_lock();
- prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
- empty = bpf_prog_array_is_empty(prog_array);
- rcu_read_unlock();
-
- return empty;
-}
-
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
struct bpf_sockopt_buf *buf)
{
@@ -1426,19 +1442,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
};
int ret, max_optlen;
- /* Opportunistic check to see whether we have any BPF program
- * attached to the hook so we don't waste time allocating
- * memory and locking the socket.
- */
- if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
- return 0;
-
/* Allocate a bit more than the initial user buffer for
* BPF program. The canonical use case is overriding
* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
*/
max_optlen = max_t(int, 16, *optlen);
-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
if (max_optlen < 0)
return max_optlen;
@@ -1452,13 +1460,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
- &ctx, bpf_prog_run);
+ &ctx, bpf_prog_run, 0);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret)
goto out;
- }
if (ctx.optlen == -1) {
/* optlen set to -1, bypass kernel */
@@ -1518,19 +1524,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
+ .current_task = current,
};
int ret;
- /* Opportunistic check to see whether we have any BPF program
- * attached to the hook so we don't waste time allocating
- * memory and locking the socket.
- */
- if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
- return retval;
-
ctx.optlen = max_optlen;
-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
if (max_optlen < 0)
return max_optlen;
@@ -1562,27 +1560,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
+ &ctx, bpf_prog_run, retval);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret < 0)
goto out;
- }
if (ctx.optlen > max_optlen || ctx.optlen < 0) {
ret = -EFAULT;
goto out;
}
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval) {
- ret = -EFAULT;
- goto out;
- }
-
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1579,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
}
- ret = ctx.retval;
-
out:
sockopt_free_buf(&ctx, &buf);
return ret;
@@ -1607,10 +1593,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
.optlen = *optlen,
.optval = optval,
.optval_end = optval + *optlen,
+ .current_task = current,
};
int ret;
@@ -1623,25 +1609,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
*/
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
- if (!ret)
- return -EPERM;
+ &ctx, bpf_prog_run, retval);
+ if (ret < 0)
+ return ret;
if (ctx.optlen > *optlen)
return -EFAULT;
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval)
- return -EFAULT;
-
/* BPF programs can shrink the buffer, export the modifications.
*/
if (ctx.optlen != 0)
*optlen = ctx.optlen;
- return ctx.retval;
+ return ret;
}
#endif
@@ -2057,10 +2037,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break;
case offsetof(struct bpf_sockopt, retval):
- if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
- else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+ BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+ if (type == BPF_WRITE) {
+ int treg = BPF_REG_9;
+
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ treg, treg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ treg, si->src_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ } else {
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ }
break;
case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index de3e5bc6781f..42d96549a804 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly;
static void
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
{
- const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
- unsigned long addr = (unsigned long)hdr;
-
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
- prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
+ prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
}
static void
@@ -808,6 +805,137 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
return slot;
}
+/*
+ * BPF program pack allocator.
+ *
+ * Most BPF programs are pretty small. Allocating a hole page for each
+ * program is sometime a waste. Many small bpf program also adds pressure
+ * to instruction TLB. To solve this issue, we introduce a BPF program pack
+ * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
+ * to host BPF programs.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE
+#else
+#define BPF_PROG_PACK_SIZE PAGE_SIZE
+#endif
+#define BPF_PROG_CHUNK_SHIFT 6
+#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT)
+#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1))
+#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
+
+struct bpf_prog_pack {
+ struct list_head list;
+ void *ptr;
+ unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)];
+};
+
+#define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE
+#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
+
+static DEFINE_MUTEX(pack_mutex);
+static LIST_HEAD(pack_list);
+
+static struct bpf_prog_pack *alloc_new_pack(void)
+{
+ struct bpf_prog_pack *pack;
+
+ pack = kzalloc(sizeof(*pack), GFP_KERNEL);
+ if (!pack)
+ return NULL;
+ pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
+ if (!pack->ptr) {
+ kfree(pack);
+ return NULL;
+ }
+ bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+ list_add_tail(&pack->list, &pack_list);
+
+ set_vm_flush_reset_perms(pack->ptr);
+ set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ return pack;
+}
+
+static void *bpf_prog_pack_alloc(u32 size)
+{
+ unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
+ struct bpf_prog_pack *pack;
+ unsigned long pos;
+ void *ptr = NULL;
+
+ if (size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+ size = round_up(size, PAGE_SIZE);
+ ptr = module_alloc(size);
+ if (ptr) {
+ set_vm_flush_reset_perms(ptr);
+ set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
+ set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+ }
+ return ptr;
+ }
+ mutex_lock(&pack_mutex);
+ list_for_each_entry(pack, &pack_list, list) {
+ pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ nbits, 0);
+ if (pos < BPF_PROG_CHUNK_COUNT)
+ goto found_free_area;
+ }
+
+ pack = alloc_new_pack();
+ if (!pack)
+ goto out;
+
+ pos = 0;
+
+found_free_area:
+ bitmap_set(pack->bitmap, pos, nbits);
+ ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
+
+out:
+ mutex_unlock(&pack_mutex);
+ return ptr;
+}
+
+static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+{
+ struct bpf_prog_pack *pack = NULL, *tmp;
+ unsigned int nbits;
+ unsigned long pos;
+ void *pack_ptr;
+
+ if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+ module_memfree(hdr);
+ return;
+ }
+
+ pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1));
+ mutex_lock(&pack_mutex);
+
+ list_for_each_entry(tmp, &pack_list, list) {
+ if (tmp->ptr == pack_ptr) {
+ pack = tmp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
+ goto out;
+
+ nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
+ pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+
+ bitmap_clear(pack->bitmap, pos, nbits);
+ if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ BPF_PROG_CHUNK_COUNT, 0) == 0) {
+ list_del(&pack->list);
+ module_memfree(pack->ptr);
+ kfree(pack);
+ }
+out:
+ mutex_unlock(&pack_mutex);
+}
+
static atomic_long_t bpf_jit_current;
/* Can be overridden by an arch's JIT compiler if it has a custom,
@@ -833,12 +961,11 @@ static int __init bpf_jit_charge_init(void)
}
pure_initcall(bpf_jit_charge_init);
-int bpf_jit_charge_modmem(u32 pages)
+int bpf_jit_charge_modmem(u32 size)
{
- if (atomic_long_add_return(pages, &bpf_jit_current) >
- (bpf_jit_limit >> PAGE_SHIFT)) {
+ if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
if (!bpf_capable()) {
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
return -EPERM;
}
}
@@ -846,9 +973,9 @@ int bpf_jit_charge_modmem(u32 pages)
return 0;
}
-void bpf_jit_uncharge_modmem(u32 pages)
+void bpf_jit_uncharge_modmem(u32 size)
{
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
}
void *__weak bpf_jit_alloc_exec(unsigned long size)
@@ -867,7 +994,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_binary_header *hdr;
- u32 size, hole, start, pages;
+ u32 size, hole, start;
WARN_ON_ONCE(!is_power_of_2(alignment) ||
alignment > BPF_IMAGE_ALIGNMENT);
@@ -877,20 +1004,19 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
* random section of illegal instructions.
*/
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
- pages = size / PAGE_SIZE;
- if (bpf_jit_charge_modmem(pages))
+ if (bpf_jit_charge_modmem(size))
return NULL;
hdr = bpf_jit_alloc_exec(size);
if (!hdr) {
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(size);
return NULL;
}
/* Fill space with illegal/arch-dep instructions. */
bpf_fill_ill_insns(hdr, size);
- hdr->pages = pages;
+ hdr->size = size;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -903,10 +1029,113 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
- u32 pages = hdr->pages;
+ u32 size = hdr->size;
bpf_jit_free_exec(hdr);
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(size);
+}
+
+/* Allocate jit binary from bpf_prog_pack allocator.
+ * Since the allocated memory is RO+X, the JIT engine cannot write directly
+ * to the memory. To solve this problem, a RW buffer is also allocated at
+ * as the same time. The JIT engine should calculate offsets based on the
+ * RO memory address, but write JITed program to the RW buffer. Once the
+ * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
+ * the JITed program to the RO memory.
+ */
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
+ unsigned int alignment,
+ struct bpf_binary_header **rw_header,
+ u8 **rw_image,
+ bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+ struct bpf_binary_header *ro_header;
+ u32 size, hole, start;
+
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
+ alignment > BPF_IMAGE_ALIGNMENT);
+
+ /* add 16 bytes for a random section of illegal instructions */
+ size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
+
+ if (bpf_jit_charge_modmem(size))
+ return NULL;
+ ro_header = bpf_prog_pack_alloc(size);
+ if (!ro_header) {
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ *rw_header = kvmalloc(size, GFP_KERNEL);
+ if (!*rw_header) {
+ bpf_prog_pack_free(ro_header);
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ /* Fill space with illegal/arch-dep instructions. */
+ bpf_fill_ill_insns(*rw_header, size);
+ (*rw_header)->size = size;
+
+ hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
+ BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
+ start = (get_random_int() % hole) & ~(alignment - 1);
+
+ *image_ptr = &ro_header->image[start];
+ *rw_image = &(*rw_header)->image[start];
+
+ return ro_header;
+}
+
+/* Copy JITed text from rw_header to its final location, the ro_header. */
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+ struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ void *ptr;
+
+ ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
+
+ kvfree(rw_header);
+
+ if (IS_ERR(ptr)) {
+ bpf_prog_pack_free(ro_header);
+ return PTR_ERR(ptr);
+ }
+ prog->aux->use_bpf_prog_pack = true;
+ return 0;
+}
+
+/* bpf_jit_binary_pack_free is called in two different scenarios:
+ * 1) when the program is freed after;
+ * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
+ * For case 2), we need to free both the RO memory and the RW buffer.
+ * Also, ro_header->size in 2) is not properly set yet, so rw_header->size
+ * is used for uncharge.
+ */
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ u32 size = rw_header ? rw_header->size : ro_header->size;
+
+ bpf_prog_pack_free(ro_header);
+ kvfree(rw_header);
+ bpf_jit_uncharge_modmem(size);
+}
+
+static inline struct bpf_binary_header *
+bpf_jit_binary_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->bpf_func;
+ unsigned long addr;
+
+ if (fp->aux->use_bpf_prog_pack)
+ addr = real_start & BPF_PROG_CHUNK_MASK;
+ else
+ addr = real_start & PAGE_MASK;
+
+ return (void *)addr;
}
/* This symbol is only overridden by archs that have different
@@ -918,7 +1147,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
if (fp->jited) {
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
- bpf_jit_binary_free(hdr);
+ if (fp->aux->use_bpf_prog_pack)
+ bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
+ else
+ bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
@@ -1829,28 +2061,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
}
#endif
-bool bpf_prog_array_compatible(struct bpf_array *array,
- const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
bool ret;
if (fp->kprobe_override)
return false;
- spin_lock(&array->aux->owner.lock);
-
- if (!array->aux->owner.type) {
+ spin_lock(&map->owner.lock);
+ if (!map->owner.type) {
/* There's no owner yet where we could check for
* compatibility.
*/
- array->aux->owner.type = fp->type;
- array->aux->owner.jited = fp->jited;
+ map->owner.type = fp->type;
+ map->owner.jited = fp->jited;
+ map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
ret = true;
} else {
- ret = array->aux->owner.type == fp->type &&
- array->aux->owner.jited == fp->jited;
+ ret = map->owner.type == fp->type &&
+ map->owner.jited == fp->jited &&
+ map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
}
- spin_unlock(&array->aux->owner.lock);
+ spin_unlock(&map->owner.lock);
+
return ret;
}
@@ -1862,13 +2096,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
mutex_lock(&aux->used_maps_mutex);
for (i = 0; i < aux->used_map_cnt; i++) {
struct bpf_map *map = aux->used_maps[i];
- struct bpf_array *array;
- if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ if (!map_type_contains_progs(map))
continue;
- array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp)) {
+ if (!bpf_prog_map_compatible(map, fp)) {
ret = -EINVAL;
goto out;
}
@@ -1968,18 +2200,10 @@ static struct bpf_prog_dummy {
},
};
-/* to avoid allocating empty bpf_prog_array for cgroups that
- * don't have bpf program attached use one global 'empty_prog_array'
- * It will not be modified the caller of bpf_prog_array_alloc()
- * (since caller requested prog_cnt == 0)
- * that pointer should be 'freed' by bpf_prog_array_free()
- */
-static struct {
- struct bpf_prog_array hdr;
- struct bpf_prog *null_prog;
-} empty_prog_array = {
+struct bpf_empty_prog_array bpf_empty_prog_array = {
.null_prog = NULL,
};
+EXPORT_SYMBOL(bpf_empty_prog_array);
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
@@ -1989,12 +2213,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
(prog_cnt + 1),
flags);
- return &empty_prog_array.hdr;
+ return &bpf_empty_prog_array.hdr;
}
void bpf_prog_array_free(struct bpf_prog_array *progs)
{
- if (!progs || progs == &empty_prog_array.hdr)
+ if (!progs || progs == &bpf_empty_prog_array.hdr)
return;
kfree_rcu(progs, rcu);
}
@@ -2453,6 +2677,11 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return -ENOTSUPP;
}
+void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index b3e6b9422238..650e5d21f90d 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+ struct bpf_map *map, int fd)
{
struct bpf_prog *prog;
@@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+ !bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return -EINVAL;
}
@@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
- if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+ if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
goto free_ptr_ring;
/* Setup kthread */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fe019dbdb3f0..038f6d7a83e4 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
goto err_put_dev;
- if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+ !bpf_prog_map_compatible(&dtab->map, prog))
goto err_put_prog;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..4e5969fde0b3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include <linux/security.h>
+#include <linux/btf_ids.h>
#include "../../lib/kstrtox.h"
@@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
+ const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
+{
+ int ret;
+
+ /* flags is not used yet */
+ if (unlikely(flags))
+ return -EINVAL;
+
+ if (unlikely(!size))
+ return 0;
+
+ ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+ if (ret == size)
+ return 0;
+
+ memset(dst, 0, size);
+ /* Return -EFAULT for partial read */
+ return ret < 0 ? ret : -EFAULT;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_task_proto = {
+ .func = bpf_copy_from_user_task,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_BTF_ID,
+ .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+ .arg5_type = ARG_ANYTHING
+};
+
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
if (cpu >= nr_cpu_ids)
diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 1400ac58178e..baf47d9c7557 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -1,40 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
-LIBBPF_OUT = $(abspath $(obj))/libbpf
-LIBBPF_A = $(LIBBPF_OUT)/libbpf.a
-LIBBPF_DESTDIR = $(LIBBPF_OUT)
-LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
-
-# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test
-# in tools/scripts/Makefile.include always succeeds when building the kernel
-# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg".
-$(LIBBPF_A): | $(LIBBPF_OUT)
- $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \
- DESTDIR=$(LIBBPF_DESTDIR) prefix= \
- $(LIBBPF_OUT)/libbpf.a install_headers
-
-libbpf_hdrs: $(LIBBPF_A)
-
-.PHONY: libbpf_hdrs
-
-$(LIBBPF_OUT):
- $(call msg,MKDIR,$@)
- $(Q)mkdir -p $@
+LIBBPF_INCLUDE = $(LIBBPF_SRCS)/..
userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
-I $(LIBBPF_INCLUDE) -Wno-unused-result
userprogs := bpf_preload_umd
-clean-files := libbpf/
-
-$(obj)/iterators/iterators.o: | libbpf_hdrs
-
bpf_preload_umd-objs := iterators/iterators.o
-bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
-$(obj)/bpf_preload_umd: $(LIBBPF_A)
+$(obj)/bpf_preload_umd:
$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index b8bd60511227..bfe24f8c5a20 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -35,15 +35,15 @@ endif
.PHONY: all clean
-all: iterators.skel.h
+all: iterators.lskel.h
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) iterators
-iterators.skel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
+iterators.lskel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
- $(Q)$(BPFTOOL) gen skeleton $< > $@
+ $(Q)$(BPFTOOL) gen skeleton -L $< > $@
$(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c
index 5d872a705470..4dafe0f4f2b2 100644
--- a/kernel/bpf/preload/iterators/iterators.c
+++ b/kernel/bpf/preload/iterators/iterators.c
@@ -10,20 +10,36 @@
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <sys/mount.h>
-#include "iterators.skel.h"
+#include "iterators.lskel.h"
#include "bpf_preload_common.h"
int to_kernel = -1;
int from_kernel = 0;
-static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
+static int __bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
+{
+ union bpf_attr attr;
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.info.bpf_fd = bpf_fd;
+ attr.info.info_len = *info_len;
+ attr.info.info = (long) info;
+
+ err = skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (!err)
+ *info_len = attr.info.info_len;
+ return err;
+}
+
+static int send_link_to_kernel(int link_fd, const char *link_name)
{
struct bpf_preload_info obj = {};
struct bpf_link_info info = {};
__u32 info_len = sizeof(info);
int err;
- err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+ err = __bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
if (err)
return err;
obj.link_id = info.id;
@@ -37,7 +53,6 @@ static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
int main(int argc, char **argv)
{
- struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
struct iterators_bpf *skel;
int err, magic;
int debug_fd;
@@ -55,7 +70,6 @@ int main(int argc, char **argv)
printf("bad start magic %d\n", magic);
return 1;
}
- setrlimit(RLIMIT_MEMLOCK, &rlim);
/* libbpf opens BPF object and loads it into the kernel */
skel = iterators_bpf__open_and_load();
if (!skel) {
@@ -72,10 +86,10 @@ int main(int argc, char **argv)
goto cleanup;
/* send two bpf_link IDs with names to the kernel */
- err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug");
+ err = send_link_to_kernel(skel->links.dump_bpf_map_fd, "maps.debug");
if (err)
goto cleanup;
- err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug");
+ err = send_link_to_kernel(skel->links.dump_bpf_prog_fd, "progs.debug");
if (err)
goto cleanup;
diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel.h
new file mode 100644
index 000000000000..d90562d672d2
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.lskel.h
@@ -0,0 +1,428 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* THIS FILE IS AUTOGENERATED! */
+#ifndef __ITERATORS_BPF_SKEL_H__
+#define __ITERATORS_BPF_SKEL_H__
+
+#include <stdlib.h>
+#include <bpf/bpf.h>
+#include <bpf/skel_internal.h>
+
+struct iterators_bpf {
+ struct bpf_loader_ctx ctx;
+ struct {
+ struct bpf_map_desc rodata;
+ } maps;
+ struct {
+ struct bpf_prog_desc dump_bpf_map;
+ struct bpf_prog_desc dump_bpf_prog;
+ } progs;
+ struct {
+ int dump_bpf_map_fd;
+ int dump_bpf_prog_fd;
+ } links;
+ struct iterators_bpf__rodata {
+ } *rodata;
+};
+
+static inline int
+iterators_bpf__dump_bpf_map__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_map.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_map_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__dump_bpf_prog__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_prog.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_prog_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__attach(struct iterators_bpf *skel)
+{
+ int ret = 0;
+
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_map__attach(skel);
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_prog__attach(skel);
+ return ret < 0 ? ret : 0;
+}
+
+static inline void
+iterators_bpf__detach(struct iterators_bpf *skel)
+{
+ skel_closenz(skel->links.dump_bpf_map_fd);
+ skel_closenz(skel->links.dump_bpf_prog_fd);
+}
+static void
+iterators_bpf__destroy(struct iterators_bpf *skel)
+{
+ if (!skel)
+ return;
+ iterators_bpf__detach(skel);
+ skel_closenz(skel->progs.dump_bpf_map.prog_fd);
+ skel_closenz(skel->progs.dump_bpf_prog.prog_fd);
+ munmap(skel->rodata, 4096);
+ skel_closenz(skel->maps.rodata.map_fd);
+ free(skel);
+}
+static inline struct iterators_bpf *
+iterators_bpf__open(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = calloc(sizeof(*skel), 1);
+ if (!skel)
+ goto cleanup;
+ skel->ctx.sz = (void *)&skel->links - (void *)skel;
+ skel->rodata =
+ mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (skel->rodata == (void *) -1)
+ goto cleanup;
+ memcpy(skel->rodata, (void *)"\
+\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\
+\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\
+\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 98);
+ skel->maps.rodata.initial_value = (__u64)(long)skel->rodata;
+ return skel;
+cleanup:
+ iterators_bpf__destroy(skel);
+ return NULL;
+}
+
+static inline int
+iterators_bpf__load(struct iterators_bpf *skel)
+{
+ struct bpf_load_and_run_opts opts = {};
+ int err;
+
+ opts.ctx = (struct bpf_loader_ctx *)skel;
+ opts.data_sz = 6056;
+ opts.data = (void *)"\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\
+\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\xf9\x04\0\0\0\0\0\0\0\0\0\x02\x02\0\
+\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\0\x04\
+\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\0\0\0\0\0\
+\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\0\0\0\x20\
+\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\0\0\xb1\0\
+\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\0\0\x80\0\
+\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\0\0\0\0\
+\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x94\x01\0\0\x03\0\0\x04\
+\x18\0\0\0\x9c\x01\0\0\x0e\0\0\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xa4\
+\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb0\x01\0\0\0\0\0\x08\x0f\0\0\0\xb6\x01\0\0\0\0\0\
+\x01\x04\0\0\0\x20\0\0\0\xc3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
+\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xc8\x01\0\0\0\0\0\x01\x04\0\0\0\
+\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x2c\x02\0\0\x02\0\0\x04\x10\0\0\0\x13\0\
+\0\0\x03\0\0\0\0\0\0\0\x3f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x18\0\
+\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x44\x02\0\0\x01\0\0\x0c\
+\x16\0\0\0\x90\x02\0\0\x01\0\0\x04\x08\0\0\0\x99\x02\0\0\x19\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x02\x1a\0\0\0\xea\x02\0\0\x06\0\0\x04\x38\0\0\0\x9c\x01\0\0\x0e\0\0\
+\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xf7\x02\0\0\x1b\0\0\0\xc0\0\0\0\x08\
+\x03\0\0\x15\0\0\0\0\x01\0\0\x11\x03\0\0\x1d\0\0\0\x40\x01\0\0\x1b\x03\0\0\x1e\
+\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\0\0\0\0\
+\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x65\x03\0\0\x02\0\0\x04\
+\x08\0\0\0\x73\x03\0\0\x0e\0\0\0\0\0\0\0\x7c\x03\0\0\x0e\0\0\0\x20\0\0\0\x1b\
+\x03\0\0\x03\0\0\x04\x18\0\0\0\x86\x03\0\0\x1b\0\0\0\0\0\0\0\x8e\x03\0\0\x21\0\
+\0\0\x40\0\0\0\x94\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\0\0\0\
+\0\0\0\0\0\x02\x24\0\0\0\x98\x03\0\0\x01\0\0\x04\x04\0\0\0\xa3\x03\0\0\x0e\0\0\
+\0\0\0\0\0\x0c\x04\0\0\x01\0\0\x04\x04\0\0\0\x15\x04\0\0\x0e\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x8b\x04\0\0\0\0\0\x0e\x25\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\x9f\x04\
+\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\
+\x20\0\0\0\xb5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\
+\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xca\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe1\x04\0\0\0\0\0\x0e\x2d\0\0\
+\0\x01\0\0\0\xe9\x04\0\0\x04\0\0\x0f\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\x28\
+\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\0\
+\x11\0\0\0\xf1\x04\0\0\x01\0\0\x0f\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\
+\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\
+\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\x65\
+\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\
+\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\
+\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\
+\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\
+\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\
+\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\
+\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\
+\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\
+\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\
+\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\
+\x29\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\
+\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x30\
+\x3a\x32\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\
+\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\
+\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
+\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\
+\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
+\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
+\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
+\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
+\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
+\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
+\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
+\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
+\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
+\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
+\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
+\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
+\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
+\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
+\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
+\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
+\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
+\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
+\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
+\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
+\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
+\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
+\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
+\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
+\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
+\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
+\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
+\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
+\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
+\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
+\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
+\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
+\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
+\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
+\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
+\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2d\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\
+\0\x04\0\0\0\x62\0\0\0\x01\0\0\0\x80\x04\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\
+\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\
+\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\
+\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\
+\x25\x73\x20\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\
+\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1b\0\0\
+\0\0\0\x79\x11\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\
+\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\
+\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\
+\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\
+\x7b\x1a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\
+\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x23\0\0\0\xb7\x03\0\0\x0e\0\0\0\
+\xb7\x05\0\0\x18\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x3c\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x3c\x01\0\x02\0\0\0\x42\0\0\0\xee\0\0\0\x1d\x44\x01\0\x03\0\0\
+\0\x42\0\0\0\x0f\x01\0\0\x06\x4c\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\x40\
+\x01\0\x05\0\0\0\x42\0\0\0\x1a\x01\0\0\x1d\x40\x01\0\x06\0\0\0\x42\0\0\0\x43\
+\x01\0\0\x06\x58\x01\0\x08\0\0\0\x42\0\0\0\x56\x01\0\0\x03\x5c\x01\0\x0f\0\0\0\
+\x42\0\0\0\xdc\x01\0\0\x02\x64\x01\0\x1f\0\0\0\x42\0\0\0\x2a\x02\0\0\x01\x6c\
+\x01\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\
+\0\x10\0\0\0\x02\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\
+\x28\0\0\0\x08\0\0\0\x3f\x01\0\0\0\0\0\0\x78\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\
+\x88\0\0\0\x0d\0\0\0\xea\0\0\0\0\0\0\0\xa8\0\0\0\x0d\0\0\0\x3f\x01\0\0\0\0\0\0\
+\x1a\0\0\0\x21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\
+\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\
+\0\0\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\
+\0\0\0\0\x79\x12\x08\0\0\0\0\0\x15\x02\x3c\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x79\
+\x27\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\
+\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\x31\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\
+\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\
+\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\
+\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\
+\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\
+\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\
+\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\
+\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\
+\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\
+\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\
+\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\
+\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\
+\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\
+\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x51\0\0\0\xb7\x03\0\0\x11\0\0\0\
+\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x80\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x80\x01\0\x02\0\0\0\x42\0\0\0\x60\x02\0\0\x1f\x88\x01\0\x03\0\
+\0\0\x42\0\0\0\x84\x02\0\0\x06\x94\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\
+\x84\x01\0\x05\0\0\0\x42\0\0\0\x9d\x02\0\0\x0e\xa0\x01\0\x06\0\0\0\x42\0\0\0\
+\x1a\x01\0\0\x1d\x84\x01\0\x07\0\0\0\x42\0\0\0\x43\x01\0\0\x06\xa4\x01\0\x09\0\
+\0\0\x42\0\0\0\xaf\x02\0\0\x03\xa8\x01\0\x11\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\
+\xb0\x01\0\x18\0\0\0\x42\0\0\0\x5a\x03\0\0\x06\x04\x01\0\x1b\0\0\0\x42\0\0\0\0\
+\0\0\0\0\0\0\0\x1c\0\0\0\x42\0\0\0\xab\x03\0\0\x0f\x10\x01\0\x1d\0\0\0\x42\0\0\
+\0\xc0\x03\0\0\x2d\x14\x01\0\x1f\0\0\0\x42\0\0\0\xf7\x03\0\0\x0d\x0c\x01\0\x21\
+\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x22\0\0\0\x42\0\0\0\xc0\x03\0\0\x02\x14\x01\0\
+\x25\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x28\0\0\0\x42\0\0\0\0\0\0\0\0\0\
+\0\0\x29\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x1e\x04\
+\0\0\x0d\x18\x01\0\x2d\0\0\0\x42\0\0\0\x4c\x04\0\0\x1b\x1c\x01\0\x2e\0\0\0\x42\
+\0\0\0\x4c\x04\0\0\x06\x1c\x01\0\x2f\0\0\0\x42\0\0\0\x6f\x04\0\0\x0d\x24\x01\0\
+\x31\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\xb0\x01\0\x40\0\0\0\x42\0\0\0\x2a\x02\0\0\
+\x01\xc0\x01\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\
+\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x14\0\0\0\x3e\0\0\0\
+\0\0\0\0\x28\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x30\0\0\0\x08\0\0\0\x3f\x01\0\0\
+\0\0\0\0\x88\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x98\0\0\0\x1a\0\0\0\xea\0\0\0\0\
+\0\0\0\xb0\0\0\0\x1a\0\0\0\x52\x03\0\0\0\0\0\0\xb8\0\0\0\x1a\0\0\0\x56\x03\0\0\
+\0\0\0\0\xc8\0\0\0\x1f\0\0\0\x84\x03\0\0\0\0\0\0\xe0\0\0\0\x20\0\0\0\xea\0\0\0\
+\0\0\0\0\xf8\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x20\x01\0\0\x24\0\0\0\x3e\0\0\0\
+\0\0\0\0\x58\x01\0\0\x1a\0\0\0\xea\0\0\0\0\0\0\0\x68\x01\0\0\x20\0\0\0\x46\x04\
+\0\0\0\0\0\0\x90\x01\0\0\x1a\0\0\0\x3f\x01\0\0\0\0\0\0\xa0\x01\0\0\x1a\0\0\0\
+\x87\x04\0\0\0\0\0\0\xa8\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x42\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\
+\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x1a\0\
+\0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\
+\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\
+\0\0\0\0";
+ opts.insns_sz = 2184;
+ opts.insns = (void *)"\
+\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\
+\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\
+\xa1\x78\xff\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x7c\xff\
+\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x80\xff\0\0\0\0\xd5\
+\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x84\xff\0\0\0\0\xd5\x01\x01\0\0\
+\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\
+\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xbf\x70\0\0\
+\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x48\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\
+\0\0\x44\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\
+\0\0\0\0\x38\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\
+\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\
+\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\
+\0\0\0\x61\x60\x20\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\
+\0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\
+\xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\
+\0\x79\x63\x18\0\0\0\0\0\x15\x03\x04\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\
+\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x85\0\0\0\x94\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\x01\0\
+\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\0\0\
+\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\xa6\0\
+\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xa3\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\x01\0\0\
+\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\xb7\x03\
+\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x96\xff\0\0\0\0\
+\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x11\0\
+\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\x61\0\0\0\0\
+\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x40\
+\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\
+\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\0\x7b\x01\
+\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\
+\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\x61\
+\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\
+\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\xa0\x78\
+\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\x0c\0\0\
+\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x60\xff\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\x77\x07\
+\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\
+\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\
+\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\
+\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4e\xff\0\0\0\0\x63\
+\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\0\0\0\0\
+\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x18\x12\
+\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\
+\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\x7b\x01\0\0\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\0\0\x61\
+\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\0\0\0\0\
+\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\0\0\0\0\
+\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\x01\0\0\
+\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\0\x63\
+\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\0\0\0\
+\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\
+\0\0\xc5\x07\x17\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\x70\x6c\
+\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\
+\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\
+\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\0\0\0\0\
+\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x05\
+\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\x02\0\0\0\
+\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\x63\x06\
+\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\0\0\0\0\
+\x95\0\0\0\0\0\0\0";
+ err = bpf_load_and_run(&opts);
+ if (err < 0)
+ return err;
+ skel->rodata =
+ mmap(skel->rodata, 4096, PROT_READ, MAP_SHARED | MAP_FIXED,
+ skel->maps.rodata.map_fd, 0);
+ return 0;
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open_and_load(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = iterators_bpf__open();
+ if (!skel)
+ return NULL;
+ if (iterators_bpf__load(skel)) {
+ iterators_bpf__destroy(skel);
+ return NULL;
+ }
+ return skel;
+}
+
+#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/preload/iterators/iterators.skel.h b/kernel/bpf/preload/iterators/iterators.skel.h
deleted file mode 100644
index cf9a6a94b3a4..000000000000
--- a/kernel/bpf/preload/iterators/iterators.skel.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/* THIS FILE IS AUTOGENERATED! */
-#ifndef __ITERATORS_BPF_SKEL_H__
-#define __ITERATORS_BPF_SKEL_H__
-
-#include <stdlib.h>
-#include <bpf/libbpf.h>
-
-struct iterators_bpf {
- struct bpf_object_skeleton *skeleton;
- struct bpf_object *obj;
- struct {
- struct bpf_map *rodata;
- } maps;
- struct {
- struct bpf_program *dump_bpf_map;
- struct bpf_program *dump_bpf_prog;
- } progs;
- struct {
- struct bpf_link *dump_bpf_map;
- struct bpf_link *dump_bpf_prog;
- } links;
- struct iterators_bpf__rodata {
- char dump_bpf_map____fmt[35];
- char dump_bpf_map____fmt_1[14];
- char dump_bpf_prog____fmt[32];
- char dump_bpf_prog____fmt_2[17];
- } *rodata;
-};
-
-static void
-iterators_bpf__destroy(struct iterators_bpf *obj)
-{
- if (!obj)
- return;
- if (obj->skeleton)
- bpf_object__destroy_skeleton(obj->skeleton);
- free(obj);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj);
-
-static inline struct iterators_bpf *
-iterators_bpf__open_opts(const struct bpf_object_open_opts *opts)
-{
- struct iterators_bpf *obj;
-
- obj = (struct iterators_bpf *)calloc(1, sizeof(*obj));
- if (!obj)
- return NULL;
- if (iterators_bpf__create_skeleton(obj))
- goto err;
- if (bpf_object__open_skeleton(obj->skeleton, opts))
- goto err;
-
- return obj;
-err:
- iterators_bpf__destroy(obj);
- return NULL;
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open(void)
-{
- return iterators_bpf__open_opts(NULL);
-}
-
-static inline int
-iterators_bpf__load(struct iterators_bpf *obj)
-{
- return bpf_object__load_skeleton(obj->skeleton);
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open_and_load(void)
-{
- struct iterators_bpf *obj;
-
- obj = iterators_bpf__open();
- if (!obj)
- return NULL;
- if (iterators_bpf__load(obj)) {
- iterators_bpf__destroy(obj);
- return NULL;
- }
- return obj;
-}
-
-static inline int
-iterators_bpf__attach(struct iterators_bpf *obj)
-{
- return bpf_object__attach_skeleton(obj->skeleton);
-}
-
-static inline void
-iterators_bpf__detach(struct iterators_bpf *obj)
-{
- return bpf_object__detach_skeleton(obj->skeleton);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj)
-{
- struct bpf_object_skeleton *s;
-
- s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
- if (!s)
- return -1;
- obj->skeleton = s;
-
- s->sz = sizeof(*s);
- s->name = "iterators_bpf";
- s->obj = &obj->obj;
-
- /* maps */
- s->map_cnt = 1;
- s->map_skel_sz = sizeof(*s->maps);
- s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
- if (!s->maps)
- goto err;
-
- s->maps[0].name = "iterator.rodata";
- s->maps[0].map = &obj->maps.rodata;
- s->maps[0].mmaped = (void **)&obj->rodata;
-
- /* programs */
- s->prog_cnt = 2;
- s->prog_skel_sz = sizeof(*s->progs);
- s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
- if (!s->progs)
- goto err;
-
- s->progs[0].name = "dump_bpf_map";
- s->progs[0].prog = &obj->progs.dump_bpf_map;
- s->progs[0].link = &obj->links.dump_bpf_map;
-
- s->progs[1].name = "dump_bpf_prog";
- s->progs[1].prog = &obj->progs.dump_bpf_prog;
- s->progs[1].link = &obj->links.dump_bpf_prog;
-
- s->data_sz = 7176;
- s->data = (void *)"\
-\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x48\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
-\x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\
-\x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
-\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\
-\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\
-\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf8\
-\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\
-\0\x18\x02\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x0e\0\0\0\xb7\x05\0\0\x18\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x79\x12\0\0\0\0\
-\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\
-\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\
-\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\x31\0\0\0\0\0\0\0\0\0\
-\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\
-\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\
-\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\
-\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\
-\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\
-\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\
-\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\
-\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\
-\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
-\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\
-\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\
-\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\
-\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\
-\0\x18\x02\0\0\x51\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x20\x20\x69\x64\
-\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\
-\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\
-\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\
-\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\
-\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\x09\x05\0\0\0\0\0\0\0\0\0\
-\x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\
-\0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\
-\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xaf\0\0\0\x03\0\0\x04\x18\0\
-\0\0\xbd\0\0\0\x09\0\0\0\0\0\0\0\xc1\0\0\0\x0b\0\0\0\x40\0\0\0\xcc\0\0\0\x0b\0\
-\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd4\0\0\0\0\0\0\x07\0\0\0\0\xdd\0\0\
-\0\0\0\0\x08\x0c\0\0\0\xe3\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xa4\x01\0\0\x03\
-\0\0\x04\x18\0\0\0\xac\x01\0\0\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\
-\0\xb4\x01\0\0\x0e\0\0\0\xa0\0\0\0\xc0\x01\0\0\0\0\0\x08\x0f\0\0\0\xc6\x01\0\0\
-\0\0\0\x01\x04\0\0\0\x20\0\0\0\xd3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
-\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xd8\x01\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x3c\x02\0\0\x02\0\0\x04\x10\0\0\0\
-\x13\0\0\0\x03\0\0\0\0\0\0\0\x4f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
-\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x54\x02\0\0\x01\0\
-\0\x0c\x16\0\0\0\xa0\x02\0\0\x01\0\0\x04\x08\0\0\0\xa9\x02\0\0\x19\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xfa\x02\0\0\x06\0\0\x04\x38\0\0\0\xac\x01\0\0\
-\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\0\x07\x03\0\0\x1b\0\0\0\xc0\0\
-\0\0\x18\x03\0\0\x15\0\0\0\0\x01\0\0\x21\x03\0\0\x1d\0\0\0\x40\x01\0\0\x2b\x03\
-\0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
-\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x75\x03\0\0\x02\0\
-\0\x04\x08\0\0\0\x83\x03\0\0\x0e\0\0\0\0\0\0\0\x8c\x03\0\0\x0e\0\0\0\x20\0\0\0\
-\x2b\x03\0\0\x03\0\0\x04\x18\0\0\0\x96\x03\0\0\x1b\0\0\0\0\0\0\0\x9e\x03\0\0\
-\x21\0\0\0\x40\0\0\0\xa4\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
-\0\0\0\0\0\0\0\0\x02\x24\0\0\0\xa8\x03\0\0\x01\0\0\x04\x04\0\0\0\xb3\x03\0\0\
-\x0e\0\0\0\0\0\0\0\x1c\x04\0\0\x01\0\0\x04\x04\0\0\0\x25\x04\0\0\x0e\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x9b\x04\0\0\0\0\0\
-\x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\
-\xaf\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
-\x12\0\0\0\x20\0\0\0\xc5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
-\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xda\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xf1\x04\0\0\0\0\0\x0e\
-\x2d\0\0\0\x01\0\0\0\xf9\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
-\0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\
-\0\0\x11\0\0\0\x01\x05\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
-\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\
-\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
-\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x6c\x72\x75\x61\x2f\x62\x75\x69\x6c\
-\x64\x2f\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\
-\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\
-\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\
-\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\
-\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\
-\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\
-\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\
-\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\
-\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\
-\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\
-\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\
-\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
-\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\
-\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\
-\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
-\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
-\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
-\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
-\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
-\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
-\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
-\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
-\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
-\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
-\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
-\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
-\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
-\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
-\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
-\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
-\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
-\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
-\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
-\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
-\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
-\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
-\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
-\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
-\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
-\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
-\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
-\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
-\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
-\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
-\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
-\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
-\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
-\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
-\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
-\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
-\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
-\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\
-\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\
-\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\x62\x02\0\0\x01\0\0\0\
-\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\
-\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x87\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\
-\xfe\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\0\0\0\x1f\x01\0\0\x06\x50\x01\0\x20\0\0\
-\0\x42\0\0\0\x2e\x01\0\0\x1d\x44\x01\0\x28\0\0\0\x42\0\0\0\x53\x01\0\0\x06\x5c\
-\x01\0\x38\0\0\0\x42\0\0\0\x66\x01\0\0\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xec\
-\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\0\x3a\x02\0\0\x01\x70\x01\0\x62\x02\0\
-\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x87\
-\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\x70\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\
-\x42\0\0\0\x94\x02\0\0\x06\x98\x01\0\x20\0\0\0\x42\0\0\0\xad\x02\0\0\x0e\xa4\
-\x01\0\x28\0\0\0\x42\0\0\0\x2e\x01\0\0\x1d\x88\x01\0\x30\0\0\0\x42\0\0\0\x53\
-\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\xbf\x02\0\0\x03\xac\x01\0\x80\0\0\0\
-\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xb8\0\0\0\x42\0\0\0\x6a\x03\0\0\x06\x08\
-\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\x42\0\0\0\xbb\x03\0\0\x0f\
-\x14\x01\0\xe0\0\0\0\x42\0\0\0\xd0\x03\0\0\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\
-\x07\x04\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\
-\0\0\0\xd0\x03\0\0\x02\x18\x01\0\x20\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\
-\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\
-\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\
-\0\x5c\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\0\0\0\x5c\x04\0\0\x06\x20\x01\0\
-\x70\x01\0\0\x42\0\0\0\x7f\x04\0\0\x0d\x28\x01\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\
-\0\0\0\0\x80\x01\0\0\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\
-\x3a\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\
-\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\0\0\xfa\0\
-\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\
-\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xfa\0\0\0\0\0\0\0\xa0\0\0\0\x0d\0\0\0\x2a\x01\
-\0\0\0\0\0\0\x62\x02\0\0\x12\0\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\
-\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xfa\0\0\0\0\0\0\0\x20\0\0\0\
-\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x80\0\0\0\
-\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\xa8\0\0\0\
-\x1a\0\0\0\x62\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\0\x66\x03\0\0\0\0\0\0\xc0\0\0\
-\0\x1f\0\0\0\x94\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\0\0\xfa\0\0\0\0\0\0\0\xf0\0\0\
-\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\
-\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\x60\x01\0\0\x20\0\0\0\x56\x04\0\0\0\0\0\0\x88\
-\x01\0\0\x1a\0\0\0\x2a\x01\0\0\0\0\0\0\x98\x01\0\0\x1a\0\0\0\x97\x04\0\0\0\0\0\
-\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x91\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xe6\0\0\
-\0\0\0\x02\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\x02\0\xf0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\xdf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\xd1\0\0\0\0\0\x03\0\x80\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xca\0\0\0\0\0\x03\0\
-\xf8\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\
-\0\0\0\0\0\0\0\x04\x01\0\0\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\
-\0\0\0\x01\0\x04\0\x31\0\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\xed\0\0\0\x01\0\x04\0\
-\x51\0\0\0\0\0\0\0\x11\0\0\0\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\
-\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\
-\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\
-\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\
-\0\0\x0d\0\0\0\xc8\0\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\
-\0\x0d\0\0\0\xd0\x01\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\
-\0\0\x0d\0\0\0\xfc\x03\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x08\x04\0\0\0\0\0\0\x0a\
-\0\0\0\x0d\0\0\0\x14\x04\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x2c\x04\0\0\0\0\0\0\0\
-\0\0\0\x0e\0\0\0\x2c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x50\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0b\0\
-\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\
-\x90\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xb0\0\
-\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xd0\0\0\0\0\
-\0\0\0\0\0\0\0\x0b\0\0\0\xe8\0\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\0\0\0\0\0\0\0\
-\0\0\0\0\x0c\0\0\0\x08\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x01\0\0\0\0\0\0\0\
-\0\0\0\x0c\0\0\0\x28\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x01\0\0\0\0\0\0\0\0\
-\0\0\x0c\0\0\0\x48\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\
-\0\x0c\0\0\0\x68\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x88\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xa8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4d\x4e\x40\x41\x42\x43\x4c\0\
-\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\
-\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\
-\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\
-\x6e\x73\x65\0\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\
-\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\
-\x61\x74\x61\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\
-\x4c\x42\x42\x31\x5f\x37\0\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\
-\x4c\x42\x42\x31\x5f\x33\0\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\
-\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\
-\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x40\x01\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\xb1\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\
-\0\0\0\0\0\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\x89\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbd\0\0\0\x01\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x3d\x09\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa9\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x18\x11\0\0\0\0\0\0\x98\x01\0\0\0\0\0\0\x0e\0\0\0\x0e\0\0\0\x08\0\0\
-\0\0\0\0\0\x18\0\0\0\0\0\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\
-\x10\0\0\0\0\0\0\0\x69\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\
-\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\
-\0\0\0\0\xb9\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xf0\x12\0\0\0\0\0\
-\0\x50\0\0\0\0\0\0\0\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\
-\x07\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x13\0\0\0\0\0\0\xe0\
-\x03\0\0\0\0\0\0\x08\0\0\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\
-\0\0\x03\x4c\xff\x6f\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\0\0\0\0\x07\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\x03\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x27\x17\0\0\0\0\0\0\x1a\x01\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
-
- return 0;
-err:
- bpf_object__destroy_skeleton(s);
- return -1;
-}
-
-#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fa4505f9b611..72ce1edde950 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
- const struct bpf_map *map = filp->private_data;
- const struct bpf_array *array;
+ struct bpf_map *map = filp->private_data;
u32 type = 0, jited = 0;
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
- array = container_of(map, struct bpf_array, map);
- spin_lock(&array->aux->owner.lock);
- type = array->aux->owner.type;
- jited = array->aux->owner.jited;
- spin_unlock(&array->aux->owner.lock);
+ if (map_type_contains_progs(map)) {
+ spin_lock(&map->owner.lock);
+ type = map->owner.type;
+ jited = map->owner.jited;
+ spin_unlock(&map->owner.lock);
}
seq_printf(m,
@@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr)
atomic64_set(&map->refcnt, 1);
atomic64_set(&map->usercnt, 1);
mutex_init(&map->freeze_mutex);
+ spin_lock_init(&map->owner.lock);
map->spin_lock_off = -EINVAL;
map->timer_off = -EINVAL;
@@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_STATE_FREQ |
BPF_F_SLEEPABLE |
- BPF_F_TEST_RND_HI32))
+ BPF_F_TEST_RND_HI32 |
+ BPF_F_XDP_HAS_FRAGS))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
prog->aux->dst_prog = dst_prog;
prog->aux->offload_requested = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+ prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
err = security_bpf_prog_alloc(prog->aux);
if (err)
@@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_FLOW_DISSECTOR:
case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
+ case BPF_SK_SKB_STREAM_PARSER:
+ case BPF_SK_SKB_STREAM_VERDICT:
+ case BPF_SK_MSG_VERDICT:
+ case BPF_SK_SKB_VERDICT:
+ return sock_map_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5e7edf913060..7224691df2ec 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -213,7 +213,7 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work)
im = container_of(work, struct bpf_tramp_image, work);
bpf_image_ksym_del(&im->ksym);
bpf_jit_free_exec(im->image);
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
percpu_ref_exit(&im->pcref);
kfree_rcu(im, rcu);
}
@@ -310,7 +310,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
if (!im)
goto out;
- err = bpf_jit_charge_modmem(1);
+ err = bpf_jit_charge_modmem(PAGE_SIZE);
if (err)
goto out_free_im;
@@ -332,7 +332,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
out_free_image:
bpf_jit_free_exec(im->image);
out_uncharge:
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
out_free_im:
kfree(im);
out:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a39eedecc93a..bbef86cb4e72 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
return base_type(type) == PTR_TO_SOCKET ||
base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM;
+ base_type(type) == PTR_TO_MEM ||
+ base_type(type) == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
@@ -535,7 +536,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
static const char *reg_type_str(struct bpf_verifier_env *env,
enum bpf_reg_type type)
{
- char postfix[16] = {0}, prefix[16] = {0};
+ char postfix[16] = {0}, prefix[32] = {0};
static const char * const str[] = {
[NOT_INIT] = "?",
[SCALAR_VALUE] = "inv",
@@ -569,9 +570,11 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
}
if (type & MEM_RDONLY)
- strncpy(prefix, "rdonly_", 16);
+ strncpy(prefix, "rdonly_", 32);
if (type & MEM_ALLOC)
- strncpy(prefix, "alloc_", 16);
+ strncpy(prefix, "alloc_", 32);
+ if (type & MEM_USER)
+ strncpy(prefix, "user_", 32);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -1546,14 +1549,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno,
enum bpf_reg_type reg_type,
- struct btf *btf, u32 btf_id)
+ struct btf *btf, u32 btf_id,
+ enum bpf_type_flag flag)
{
if (reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, regno);
return;
}
mark_reg_known_zero(env, regs, regno);
- regs[regno].type = PTR_TO_BTF_ID;
+ regs[regno].type = PTR_TO_BTF_ID | flag;
regs[regno].btf = btf;
regs[regno].btf_id = btf_id;
}
@@ -1743,7 +1747,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
}
static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
- s16 offset, struct module **btf_modp)
+ s16 offset)
{
struct bpf_kfunc_btf kf_btf = { .offset = offset };
struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1801,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_btf_cmp_by_off, NULL);
}
- if (btf_modp)
- *btf_modp = b->module;
return b->btf;
}
@@ -1815,8 +1817,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
}
static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
- u32 func_id, s16 offset,
- struct module **btf_modp)
+ u32 func_id, s16 offset)
{
if (offset) {
if (offset < 0) {
@@ -1827,7 +1828,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
return ERR_PTR(-EINVAL);
}
- return __find_kfunc_desc_btf(env, offset, btf_modp);
+ return __find_kfunc_desc_btf(env, offset);
}
return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
@@ -1890,7 +1891,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
prog_aux->kfunc_btf_tab = btf_tab;
}
- desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+ desc_btf = find_kfunc_desc_btf(env, func_id, offset);
if (IS_ERR(desc_btf)) {
verbose(env, "failed to find BTF for kernel function\n");
return PTR_ERR(desc_btf);
@@ -2351,7 +2352,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
return NULL;
- desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+ desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
if (IS_ERR(desc_btf))
return "<error>";
@@ -3498,11 +3499,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
#define MAX_PACKET_OFF 0xffff
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
- return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
@@ -4159,6 +4155,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = regs + regno;
const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+ enum bpf_type_flag flag = 0;
u32 btf_id;
int ret;
@@ -4178,9 +4175,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (reg->type & MEM_USER) {
+ verbose(env,
+ "R%d is ptr_%s access user memory: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
if (env->ops->btf_struct_access) {
ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
- off, size, atype, &btf_id);
+ off, size, atype, &btf_id, &flag);
} else {
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
@@ -4188,14 +4192,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
ret = btf_struct_access(&env->log, reg->btf, t, off, size,
- atype, &btf_id);
+ atype, &btf_id, &flag);
}
if (ret < 0)
return ret;
if (atype == BPF_READ && value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
return 0;
}
@@ -4208,6 +4212,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
{
struct bpf_reg_state *reg = regs + regno;
struct bpf_map *map = reg->map_ptr;
+ enum bpf_type_flag flag = 0;
const struct btf_type *t;
const char *tname;
u32 btf_id;
@@ -4245,12 +4250,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return -EACCES;
}
- ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
+ ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
if (ret < 0)
return ret;
if (value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
return 0;
}
@@ -4451,7 +4456,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
+ &btf_id);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -4877,6 +4883,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+{
+ int err;
+
+ /* This is used to refine r0 return value bounds for helpers
+ * that enforce this value as an upper bound on return values.
+ * See do_refine_retval_range() for helpers that can refine
+ * the return value. C type of helper is u32 so we pull register
+ * bound from umax_value however, if negative verifier errors
+ * out. Only upper bounds can be learned because retval is an
+ * int type and negative retvals are allowed.
+ */
+ if (meta)
+ meta->msize_max_value = reg->umax_value;
+
+ /* The register is SCALAR_VALUE; the access check
+ * happens using its boundaries.
+ */
+ if (!tnum_is_const(reg->var_off))
+ /* For unprivileged variable accesses, disable raw
+ * mode so that the program is required to
+ * initialize all the memory that the helper could
+ * just partially fill up.
+ */
+ meta = NULL;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (reg->umin_value == 0) {
+ err = check_helper_mem_access(env, regno - 1, 0,
+ zero_size_allowed,
+ meta);
+ if (err)
+ return err;
+ }
+
+ if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_helper_mem_access(env, regno - 1,
+ reg->umax_value,
+ zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
+ return err;
+}
+
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size)
{
@@ -4900,6 +4962,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
return check_helper_mem_access(env, regno, mem_size, true, NULL);
}
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno)
+{
+ struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+ bool may_be_null = type_may_be_null(mem_reg->type);
+ struct bpf_reg_state saved_reg;
+ int err;
+
+ WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+ if (may_be_null) {
+ saved_reg = *mem_reg;
+ mark_ptr_not_null_reg(mem_reg);
+ }
+
+ err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+ if (may_be_null)
+ *mem_reg = saved_reg;
+ return err;
+}
+
/* Implementation details:
* bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
* Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -5439,51 +5523,7 @@ skip_type_check:
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
- /* This is used to refine r0 return value bounds for helpers
- * that enforce this value as an upper bound on return values.
- * See do_refine_retval_range() for helpers that can refine
- * the return value. C type of helper is u32 so we pull register
- * bound from umax_value however, if negative verifier errors
- * out. Only upper bounds can be learned because retval is an
- * int type and negative retvals are allowed.
- */
- meta->msize_max_value = reg->umax_value;
-
- /* The register is SCALAR_VALUE; the access check
- * happens using its boundaries.
- */
- if (!tnum_is_const(reg->var_off))
- /* For unprivileged variable accesses, disable raw
- * mode so that the program is required to
- * initialize all the memory that the helper could
- * just partially fill up.
- */
- meta = NULL;
-
- if (reg->smin_value < 0) {
- verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
- regno);
- return -EACCES;
- }
-
- if (reg->umin_value == 0) {
- err = check_helper_mem_access(env, regno - 1, 0,
- zero_size_allowed,
- meta);
- if (err)
- return err;
- }
-
- if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
- verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
- regno);
- return -EACCES;
- }
- err = check_helper_mem_access(env, regno - 1,
- reg->umax_value,
- zero_size_allowed, meta);
- if (!err)
- err = mark_chain_precision(env, regno);
+ err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
@@ -6842,22 +6882,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
u32 i, nargs, func_id, ptr_type_id;
- struct module *btf_mod = NULL;
+ int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
- int err;
+ bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
if (!insn->imm)
return 0;
- desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+ desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
if (IS_ERR(desc_btf))
return PTR_ERR(desc_btf);
@@ -6866,23 +6907,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!env->ops->check_kfunc_call ||
- !env->ops->check_kfunc_call(func_id, btf_mod)) {
+ if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_CHECK, func_id)) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
+ acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
/* Check the arguments */
err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
- if (err)
+ if (err < 0)
return err;
+ /* In case of release function, we get register number of refcounted
+ * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+ */
+ if (err) {
+ err = release_reference(env, regs[err].ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+ if (acq && !btf_type_is_ptr(t)) {
+ verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+ return -EINVAL;
+ }
+
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +6962,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+ if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+ /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+ regs[BPF_REG_0].id = ++env->id_gen;
+ }
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+ if (acq) {
+ int id = acquire_reference_state(env, insn_idx);
+
+ if (id < 0)
+ return id;
+ regs[BPF_REG_0].id = id;
+ regs[BPF_REG_0].ref_obj_id = id;
+ }
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
nargs = btf_type_vlen(func_proto);
@@ -11549,7 +11624,7 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
- err = check_kfunc_call(env, insn);
+ err = check_kfunc_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
@@ -12992,6 +13067,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
+ prog->jited_len = func[0]->jited_len;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
bpf_prog_jit_attempt_done(prog);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..a2024ba32a20 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+ case BPF_FUNC_copy_from_user_task:
+ return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr:
@@ -1562,6 +1564,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
extern const struct bpf_func_proto bpf_skb_output_proto;
extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
@@ -1661,6 +1664,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_from_file_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_ptr_cookie_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_trace_proto;
#endif
case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 14b89aa37c5c..1555da672275 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -296,7 +296,7 @@ config DEBUG_INFO_DWARF4
config DEBUG_INFO_DWARF5
bool "Generate DWARF Version 5 debuginfo"
depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
- depends on !DEBUG_INFO_BTF
+ depends on !DEBUG_INFO_BTF || PAHOLE_VERSION >= 121
help
Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
5.0+ accepts the -gdwarf-5 flag but only had partial support for some
@@ -323,7 +323,15 @@ config DEBUG_INFO_BTF
DWARF type info into equivalent deduplicated BTF type info.
config PAHOLE_HAS_SPLIT_BTF
- def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+ def_bool PAHOLE_VERSION >= 119
+
+config PAHOLE_HAS_BTF_TAG
+ def_bool PAHOLE_VERSION >= 123
+ depends on CC_IS_CLANG
+ help
+ Decide whether pahole emits btf_tag attributes (btf_type_tag and
+ btf_decl_tag) or not. Currently only clang compiler implements
+ these attributes, so make the config depend on CC_IS_CLANG.
config DEBUG_INFO_BTF_MODULES
def_bool y
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index a6789c0c626b..dc7b14aa3431 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -20,6 +20,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
unsigned long flags;
bool leak = false;
+ dir->dead = true;
spin_lock_irqsave(&dir->lock, flags);
list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
list_del(&tracker->head);
@@ -37,6 +38,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
spin_unlock_irqrestore(&dir->lock, flags);
WARN_ON_ONCE(leak);
WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
+ WARN_ON_ONCE(refcount_read(&dir->no_tracker) != 1);
}
EXPORT_SYMBOL(ref_tracker_dir_exit);
@@ -72,6 +74,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
gfp_t gfp_mask = gfp;
unsigned long flags;
+ WARN_ON_ONCE(dir->dead);
+
+ if (!trackerp) {
+ refcount_inc(&dir->no_tracker);
+ return 0;
+ }
if (gfp & __GFP_DIRECT_RECLAIM)
gfp_mask |= __GFP_NOFAIL;
*trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
@@ -81,7 +89,6 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
return -ENOMEM;
}
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
- nr_entries = filter_irq_stacks(entries, nr_entries);
tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
spin_lock_irqsave(&dir->lock, flags);
@@ -95,17 +102,23 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
struct ref_tracker **trackerp)
{
unsigned long entries[REF_TRACKER_STACK_ENTRIES];
- struct ref_tracker *tracker = *trackerp;
depot_stack_handle_t stack_handle;
+ struct ref_tracker *tracker;
unsigned int nr_entries;
unsigned long flags;
+ WARN_ON_ONCE(dir->dead);
+
+ if (!trackerp) {
+ refcount_dec(&dir->no_tracker);
+ return 0;
+ }
+ tracker = *trackerp;
if (!tracker) {
refcount_dec(&dir->untracked);
return -EEXIST;
}
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
- nr_entries = filter_irq_stacks(entries, nr_entries);
stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
spin_lock_irqsave(&dir->lock, flags);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index a068757eabaf..7b3341cef926 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -5,6 +5,7 @@
* (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
*/
+#include <linux/if_arp.h>
#include <linux/module.h>
#include <net/6lowpan.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 9751207f7757..b7c4d656a94b 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -116,7 +116,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
return -ENOMEM;
}
- refcount_set(&ax25_rt->refcount, 1);
ax25_rt->callsign = route->dest_addr;
ax25_rt->dev = ax25_dev->dev;
ax25_rt->digipeat = NULL;
@@ -167,12 +166,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
ax25cmp(&route->dest_addr, &s->callsign) == 0) {
if (ax25_route_list == s) {
ax25_route_list = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
} else {
for (t = ax25_route_list; t != NULL; t = t->next) {
if (t->next == s) {
t->next = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
break;
}
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f4004cf0ff6f..9f311fddfaf9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 04ebe901e86f..d10651108033 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -689,6 +689,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
+ conn->handle = HCI_CONN_HANDLE_UNSET;
conn->hdev = hdev;
conn->type = type;
conn->role = role;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2b7bd3655b07..5bde0ec41177 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2503,6 +2503,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_LIST_HEAD(&hdev->adv_instances);
INIT_LIST_HEAD(&hdev->blocked_keys);
+ INIT_LIST_HEAD(&hdev->monitored_devices);
INIT_LIST_HEAD(&hdev->local_codecs);
INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -3666,8 +3667,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
sco_recv_scodata(conn, skb);
return;
} else {
- bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
- handle);
+ bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fc30f4c03d29..63b925921c87 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3068,6 +3068,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
+ return;
+ }
+
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
hci_dev_lock(hdev);
@@ -3106,6 +3111,17 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
+ /* The HCI_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
if (!ev->status) {
conn->handle = __le16_to_cpu(ev->handle);
@@ -4534,7 +4550,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4565,7 +4581,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4587,7 +4603,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
}
-
+unlock:
hci_dev_unlock(hdev);
}
@@ -4661,6 +4677,24 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ switch (ev->link_type) {
+ case SCO_LINK:
+ case ESCO_LINK:
+ break;
+ default:
+ /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type
+ * for HCI_Synchronous_Connection_Complete is limited to
+ * either SCO or eSCO
+ */
+ bt_dev_err(hdev, "Ignoring connect complete event for invalid link type");
+ return;
+ }
+
+ if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
+ return;
+ }
+
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
hci_dev_lock(hdev);
@@ -4684,23 +4718,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ /* The HCI_Synchronous_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
+ goto unlock;
+ }
+
switch (ev->status) {
case 0x00:
- /* The synchronous connection complete event should only be
- * sent once per new connection. Receiving a successful
- * complete event when the connection status is already
- * BT_CONNECTED means that the device is misbehaving and sent
- * multiple complete event packets for the same new connection.
- *
- * Registering the device more than once can corrupt kernel
- * memory, hence upon detecting this invalid event, we report
- * an error and ignore the packet.
- */
- if (conn->state == BT_CONNECTED) {
- bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
- goto unlock;
- }
-
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -5496,6 +5526,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
struct smp_irk *irk;
u8 addr_type;
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
+ return;
+ }
+
hci_dev_lock(hdev);
/* All controllers implicitly stop advertising in the event of a
@@ -5537,6 +5572,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
+ /* The HCI_LE_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
@@ -6798,7 +6844,7 @@ static const struct hci_ev {
HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
- HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0),
+ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
@@ -6823,8 +6869,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
* decide if that is acceptable.
*/
if (skb->len > ev->max_len)
- bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u",
- event, skb->len, ev->max_len);
+ bt_dev_warn_ratelimited(hdev,
+ "unexpected event 0x%2.2x length: %u > %u",
+ event, skb->len, ev->max_len);
data = hci_ev_skb_pull(hdev, skb, event, ev->min_len);
if (!data)
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 0feb68f12545..6e71aa6b6fea 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -382,6 +382,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
{
struct hci_cmd_sync_work_entry *entry;
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return -ENODEV;
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
@@ -5140,8 +5143,8 @@ static void set_ext_conn_params(struct hci_conn *conn,
p->max_ce_len = cpu_to_le16(0x0000);
}
-int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
- u8 own_addr_type)
+static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
+ struct hci_conn *conn, u8 own_addr_type)
{
struct hci_cp_le_ext_create_conn *cp;
struct hci_cp_le_ext_conn_param *p;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 37087cf7dc5a..5dd684e0b259 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -42,7 +42,7 @@
#include "aosp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 21
+#define MGMT_REVISION 22
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -174,6 +174,8 @@ static const u16 mgmt_events[] = {
MGMT_EV_ADV_MONITOR_REMOVED,
MGMT_EV_CONTROLLER_SUSPEND,
MGMT_EV_CONTROLLER_RESUME,
+ MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ MGMT_EV_ADV_MONITOR_DEVICE_LOST,
};
static const u16 mgmt_untrusted_commands[] = {
@@ -9589,12 +9591,116 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
return true;
}
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+ bdaddr_t *bdaddr, u8 addr_type)
+{
+ struct mgmt_ev_adv_monitor_device_lost ev;
+
+ ev.monitor_handle = cpu_to_le16(handle);
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = addr_type;
+
+ mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev),
+ NULL);
+}
+
+static void mgmt_adv_monitor_device_found(struct hci_dev *hdev,
+ bdaddr_t *bdaddr, bool report_device,
+ struct sk_buff *skb,
+ struct sock *skip_sk)
+{
+ struct sk_buff *advmon_skb;
+ size_t advmon_skb_len;
+ __le16 *monitor_handle;
+ struct monitored_device *dev, *tmp;
+ bool matched = false;
+ bool notify = false;
+
+ /* We have received the Advertisement Report because:
+ * 1. the kernel has initiated active discovery
+ * 2. if not, we have pend_le_reports > 0 in which case we are doing
+ * passive scanning
+ * 3. if none of the above is true, we have one or more active
+ * Advertisement Monitor
+ *
+ * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND
+ * and report ONLY one advertisement per device for the matched Monitor
+ * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ *
+ * For case 3, since we are not active scanning and all advertisements
+ * received are due to a matched Advertisement Monitor, report all
+ * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ */
+ if (report_device && !hdev->advmon_pend_notify) {
+ mgmt_event_skb(skb, skip_sk);
+ return;
+ }
+
+ advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) -
+ sizeof(struct mgmt_ev_device_found)) + skb->len;
+ advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ advmon_skb_len);
+ if (!advmon_skb) {
+ if (report_device)
+ mgmt_event_skb(skb, skip_sk);
+ else
+ kfree_skb(skb);
+ return;
+ }
+
+ /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except
+ * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and
+ * store monitor_handle of the matched monitor.
+ */
+ monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle));
+ skb_put_data(advmon_skb, skb->data, skb->len);
+
+ hdev->advmon_pend_notify = false;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ if (!bacmp(&dev->bdaddr, bdaddr)) {
+ matched = true;
+
+ if (!dev->notified) {
+ *monitor_handle = cpu_to_le16(dev->handle);
+ notify = true;
+ dev->notified = true;
+ }
+ }
+
+ if (!dev->notified)
+ hdev->advmon_pend_notify = true;
+ }
+
+ if (!report_device &&
+ ((matched && !notify) || !msft_monitor_supported(hdev))) {
+ /* Handle 0 indicates that we are not active scanning and this
+ * is a subsequent advertisement report for an already matched
+ * Advertisement Monitor or the controller offloading support
+ * is not available.
+ */
+ *monitor_handle = 0;
+ notify = true;
+ }
+
+ if (report_device)
+ mgmt_event_skb(skb, skip_sk);
+ else
+ kfree_skb(skb);
+
+ if (notify)
+ mgmt_event_skb(advmon_skb, skip_sk);
+ else
+ kfree_skb(advmon_skb);
+}
+
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
+ bool report_device = hci_discovery_active(hdev);
/* Don't send events for a non-kernel initiated discovery. With
* LE one exception is if we have pend_le_reports > 0 in which
@@ -9603,11 +9709,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
if (!hci_discovery_active(hdev)) {
if (link_type == ACL_LINK)
return;
- if (link_type == LE_LINK &&
- list_empty(&hdev->pend_le_reports) &&
- !hci_is_adv_monitoring(hdev)) {
+ if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports))
+ report_device = true;
+ else if (!hci_is_adv_monitoring(hdev))
return;
- }
}
if (hdev->discovery.result_filtering) {
@@ -9672,7 +9777,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
- mgmt_event_skb(skb, NULL);
+ mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL);
}
void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 6a943634b31a..9a3d77d3ca86 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -80,6 +80,14 @@ struct msft_rp_le_set_advertisement_filter_enable {
__u8 sub_opcode;
} __packed;
+#define MSFT_EV_LE_MONITOR_DEVICE 0x02
+struct msft_ev_le_monitor_device {
+ __u8 addr_type;
+ bdaddr_t bdaddr;
+ __u8 monitor_handle;
+ __u8 monitor_state;
+} __packed;
+
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
@@ -204,6 +212,37 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
+/* This function requires the caller holds hdev->lock */
+static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
+ bdaddr_t *bdaddr, __u8 addr_type,
+ bool notify)
+{
+ struct monitored_device *dev, *tmp;
+ int count = 0;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ /* mgmt_handle == 0 indicates remove all devices, whereas,
+ * bdaddr == NULL indicates remove all devices matching the
+ * mgmt_handle.
+ */
+ if ((!mgmt_handle || dev->handle == mgmt_handle) &&
+ (!bdaddr || (!bacmp(bdaddr, &dev->bdaddr) &&
+ addr_type == dev->addr_type))) {
+ if (notify && dev->notified) {
+ mgmt_adv_monitor_device_lost(hdev, dev->handle,
+ &dev->bdaddr,
+ dev->addr_type);
+ }
+
+ list_del(&dev->list);
+ kfree(dev);
+ count++;
+ }
+ }
+
+ return count;
+}
+
static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
u8 status, u16 opcode,
struct sk_buff *skb)
@@ -294,6 +333,10 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
if (monitor && !msft->suspending)
hci_free_adv_monitor(hdev, monitor);
+ /* Clear any monitored devices by this Adv Monitor */
+ msft_monitor_device_del(hdev, handle_data->mgmt_handle, NULL,
+ 0, false);
+
list_del(&handle_data->list);
kfree(handle_data);
}
@@ -557,6 +600,14 @@ void msft_do_close(struct hci_dev *hdev)
list_del(&handle_data->list);
kfree(handle_data);
}
+
+ hci_dev_lock(hdev);
+
+ /* Clear any devices that are being monitored and notify device lost */
+ hdev->advmon_pend_notify = false;
+ msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+ hci_dev_unlock(hdev);
}
void msft_register(struct hci_dev *hdev)
@@ -590,10 +641,101 @@ void msft_unregister(struct hci_dev *hdev)
kfree(msft);
}
+/* This function requires the caller holds hdev->lock */
+static void msft_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ struct monitored_device *dev;
+
+ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ bt_dev_err(hdev, "MSFT vendor event %u: no memory",
+ MSFT_EV_LE_MONITOR_DEVICE);
+ return;
+ }
+
+ bacpy(&dev->bdaddr, bdaddr);
+ dev->addr_type = addr_type;
+ dev->handle = mgmt_handle;
+ dev->notified = false;
+
+ INIT_LIST_HEAD(&dev->list);
+ list_add(&dev->list, &hdev->monitored_devices);
+ hdev->advmon_pend_notify = true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_device_lost(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ if (!msft_monitor_device_del(hdev, mgmt_handle, bdaddr, addr_type,
+ true)) {
+ bt_dev_err(hdev, "MSFT vendor event %u: dev %pMR not in list",
+ MSFT_EV_LE_MONITOR_DEVICE, bdaddr);
+ }
+}
+
+static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
+ u8 ev, size_t len)
+{
+ void *data;
+
+ data = skb_pull_data(skb, len);
+ if (!data)
+ bt_dev_err(hdev, "Malformed MSFT vendor event: 0x%02x", ev);
+
+ return data;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct msft_ev_le_monitor_device *ev;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ u8 addr_type;
+
+ ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
+ if (!ev)
+ return;
+
+ bt_dev_dbg(hdev,
+ "MSFT vendor event 0x%02x: handle 0x%04x state %d addr %pMR",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->monitor_handle,
+ ev->monitor_state, &ev->bdaddr);
+
+ handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
+ if (!handle_data)
+ return;
+
+ switch (ev->addr_type) {
+ case ADDR_LE_DEV_PUBLIC:
+ addr_type = BDADDR_LE_PUBLIC;
+ break;
+
+ case ADDR_LE_DEV_RANDOM:
+ addr_type = BDADDR_LE_RANDOM;
+ break;
+
+ default:
+ bt_dev_err(hdev,
+ "MSFT vendor event 0x%02x: unknown addr type 0x%02x",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->addr_type);
+ return;
+ }
+
+ if (ev->monitor_state)
+ msft_device_found(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+ else
+ msft_device_lost(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+}
+
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
{
struct msft_data *msft = hdev->msft_data;
- u8 event;
+ u8 *evt_prefix;
+ u8 *evt;
if (!msft)
return;
@@ -602,13 +744,12 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
* matches, and otherwise just return.
*/
if (msft->evt_prefix_len > 0) {
- if (skb->len < msft->evt_prefix_len)
+ evt_prefix = msft_skb_pull(hdev, skb, 0, msft->evt_prefix_len);
+ if (!evt_prefix)
return;
- if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len))
+ if (memcmp(evt_prefix, msft->evt_prefix, msft->evt_prefix_len))
return;
-
- skb_pull(skb, msft->evt_prefix_len);
}
/* Every event starts at least with an event code and the rest of
@@ -617,10 +758,23 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
if (skb->len < 1)
return;
- event = *skb->data;
- skb_pull(skb, 1);
+ evt = msft_skb_pull(hdev, skb, 0, sizeof(*evt));
+ if (!evt)
+ return;
+
+ hci_dev_lock(hdev);
+
+ switch (*evt) {
+ case MSFT_EV_LE_MONITOR_DEVICE:
+ msft_monitor_device_evt(hdev, skb);
+ break;
- bt_dev_dbg(hdev, "MSFT vendor event %u", event);
+ default:
+ bt_dev_dbg(hdev, "MSFT vendor event 0x%02x", *evt);
+ break;
+ }
+
+ hci_dev_unlock(hdev);
}
__u64 msft_get_features(struct hci_dev *hdev)
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index fbc896323bec..d0e54e30658a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
const struct btf_type *state;
s32 type_id;
@@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 46dd95755967..f08034500813 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
@@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- u32 size, u32 retval, u32 duration)
+ struct skb_shared_info *sinfo, u32 size,
+ u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
@@ -145,8 +147,37 @@ static int bpf_test_finish(const union bpf_attr *kattr,
err = -ENOSPC;
}
- if (data_out && copy_to_user(data_out, data, copy_size))
- goto out;
+ if (data_out) {
+ int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+ if (copy_to_user(data_out, data, len))
+ goto out;
+
+ if (sinfo) {
+ int i, offset = len;
+ u32 data_len;
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ if (offset >= copy_size) {
+ err = -ENOSPC;
+ break;
+ }
+
+ data_len = min_t(u32, copy_size - offset,
+ skb_frag_size(frag));
+
+ if (copy_to_user(data_out + offset,
+ skb_frag_address(frag),
+ data_len))
+ goto out;
+
+ offset += data_len;
+ }
+ }
+ }
+
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -171,6 +202,8 @@ int noinline bpf_fentry_test1(int a)
{
return a + 1;
}
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
@@ -232,28 +265,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
return sk;
}
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ /* randomly return NULL */
+ if (get_jiffies_64() % 2)
+ return NULL;
+ return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test1)
BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
- if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
- return true;
- return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
- u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+ u32 size, u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
- u32 user_size = kattr->test.data_size_in;
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -581,7 +728,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
- data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ data = bpf_test_init(kattr, kattr->test.data_size_in,
+ size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
@@ -683,7 +831,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+ duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@@ -758,16 +907,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- u32 headroom = XDP_PACKET_HEADROOM;
u32 size = kattr->test.data_size_in;
+ u32 headroom = XDP_PACKET_HEADROOM;
+ u32 retval, duration, max_data_sz;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
+ struct skb_shared_info *sinfo;
struct xdp_buff xdp = {};
- u32 retval, duration;
+ int i, ret = -EINVAL;
struct xdp_md *ctx;
- u32 max_data_sz;
void *data;
- int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
@@ -787,26 +936,65 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
headroom -= ctx->data;
}
- /* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
+ size = min_t(u32, size, max_data_sz);
- data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+ data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
- &rxqueue->xdp_rxq);
+ rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ if (unlikely(kattr->test.data_size_in > size)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+ while (size < kattr->test.data_size_in) {
+ struct page *page;
+ skb_frag_t *frag;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, page);
+
+ data_len = min_t(u32, kattr->test.data_size_in - size,
+ PAGE_SIZE);
+ skb_frag_size_set(frag, data_len);
+
+ if (copy_from_user(page_address(page), data_in + size,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sinfo->xdp_frags_size += data_len;
+ size += data_len;
+ }
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
if (repeat > 1)
bpf_prog_change_xdp(NULL, prog);
+
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
@@ -816,12 +1004,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ret)
goto out;
- if (xdp.data_meta != data + headroom ||
- xdp.data_end != xdp.data_meta + size)
- size = xdp.data_end - xdp.data_meta;
-
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
- duration);
+ size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct xdp_md));
@@ -830,6 +1015,8 @@ out:
if (repeat > 1)
bpf_prog_change_xdp(prog, NULL);
free_data:
+ for (i = 0; i < sinfo->nr_frags; i++)
+ __free_page(skb_frag_page(&sinfo->frags[i]));
kfree(data);
free_ctx:
kfree(ctx);
@@ -876,7 +1063,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (size < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, size, 0, 0);
+ data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -911,8 +1098,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (ret < 0)
goto out;
- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
- retval, duration);
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+ sizeof(flow_keys), retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@@ -960,7 +1147,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ if (user_ctx->local_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
@@ -968,7 +1155,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
- ctx.sport = (__force __be16)user_ctx->remote_port;
+ ctx.sport = user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
@@ -1016,7 +1203,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@@ -1067,3 +1254,17 @@ out:
kfree(ctx);
return err;
}
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &test_sk_check_kfunc_ids,
+ .acquire_set = &test_sk_acquire_kfunc_ids,
+ .release_set = &test_sk_release_kfunc_ids,
+ .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 440139706130..52dd0b6835bc 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
err = caifd->layer.up->receive(caifd->layer.up, pkt);
- /* For -EILSEQ the packet is not freed so so it now */
+ /* For -EILSEQ the packet is not freed so free it now */
if (err == -EILSEQ)
cfpkt_destroy(pkt);
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f15..24221352e059 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- cgw_remove_all_jobs(net);
+ list_for_each_entry(net, net_list, exit_list)
+ cgw_remove_all_jobs(net);
rtnl_unlock();
}
static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
- .exit = cangw_pernet_exit,
+ .exit_batch = cangw_pernet_exit_batch,
};
static __init int cgw_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1baab07820f6..2c3b8744e00c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1037,7 +1037,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, name_node->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
if (!sscanf(d->name, name, &i))
continue;
@@ -1047,7 +1047,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, max_netdevices);
@@ -9143,7 +9143,7 @@ DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- dev_net(dev)->dev_unreg_count++;
+ atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -9683,8 +9683,10 @@ int register_netdevice(struct net_device *dev)
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
- dev_hold(dev);
+
+ dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL);
list_netdevice(dev);
+
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
@@ -9963,11 +9965,8 @@ void netdev_run_todo(void)
if (dev->needs_free_netdev)
free_netdev(dev);
- /* Report a network device has been unregistered */
- rtnl_lock();
- dev_net(dev)->dev_unreg_count--;
- __rtnl_unlock();
- wake_up(&netdev_unregistering_wq);
+ if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
/* Free network device */
kobject_put(&dev->dev.kobj);
@@ -10172,7 +10171,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;
- dev_hold(dev);
+ __dev_hold(dev);
#else
refcount_set(&dev->dev_refcnt, 1);
#endif
@@ -10449,7 +10448,7 @@ void unregister_netdevice_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- dev_put(dev);
+ dev_put_track(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
}
@@ -10732,8 +10731,7 @@ static int __net_init netdev_init(struct net *net)
BUILD_BUG_ON(GRO_HASH_BUCKETS >
8 * sizeof_field(struct napi_struct, gro_bitmask));
- if (net != &init_net)
- INIT_LIST_HEAD(&net->dev_base_head);
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
@@ -10849,14 +10847,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};
-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
- rtnl_lock();
+ ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
@@ -10880,24 +10878,24 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
- rtnl_unlock();
}
static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
{
- /* Return with the rtnl_lock held when there are no network
+ /* Return (with the rtnl_lock held) when there are no network
* devices unregistering in any network namespace in net_list.
*/
- struct net *net;
- bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ bool unregistering;
+ struct net *net;
+ ASSERT_RTNL();
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
unregistering = false;
- rtnl_lock();
+
list_for_each_entry(net, net_list, exit_list) {
- if (net->dev_unreg_count > 0) {
+ if (atomic_read(&net->dev_unreg_count) > 0) {
unregistering = true;
break;
}
@@ -10907,6 +10905,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
__rtnl_unlock();
wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ rtnl_lock();
}
remove_wait_queue(&netdev_unregistering_wq, &wait);
}
@@ -10922,6 +10921,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ default_device_exit_net(net);
+ cond_resched();
+ }
/* To prevent network device cleanup code from dereferencing
* loopback devices or network devices that have been freed
* wait here for all pending unregistrations to complete,
@@ -10934,6 +10938,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
* default_device_exit_batch.
*/
rtnl_lock_unregistering(net_list);
+
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10947,7 +10952,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
static struct pernet_operations __net_initdata default_device_ops = {
- .exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..4641126b8a20 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -48,6 +48,19 @@
static int trace_state = TRACE_OFF;
static bool monitor_hw;
+#undef EM
+#undef EMe
+
+#define EM(a, b) [a] = #b,
+#define EMe(a, b) [a] = #b
+
+/* drop_reasons is used to translate 'enum skb_drop_reason' to string,
+ * which is reported to user space.
+ */
+static const char * const drop_reasons[] = {
+ TRACE_SKB_DROP_REASON
+};
+
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -126,6 +139,7 @@ struct net_dm_skb_cb {
struct devlink_trap_metadata *hw_metadata;
void *pc;
};
+ enum skb_drop_reason reason;
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -498,6 +512,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
+ struct net_dm_skb_cb *cb;
struct sk_buff *nskb;
unsigned long flags;
@@ -508,7 +523,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- NET_DM_SKB_CB(nskb)->pc = location;
+ if ((unsigned int)reason >= SKB_DROP_REASON_MAX)
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ cb = NET_DM_SKB_CB(nskb);
+ cb->reason = reason;
+ cb->pc = location;
/* Override the timestamp because we care about the time when the
* packet was dropped.
*/
@@ -553,7 +572,8 @@ static size_t net_dm_in_port_size(void)
#define NET_DM_MAX_SYMBOL_LEN 40
-static size_t net_dm_packet_report_size(size_t payload_len)
+static size_t net_dm_packet_report_size(size_t payload_len,
+ enum skb_drop_reason reason)
{
size_t size;
@@ -574,6 +594,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PROTO */
nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_REASON */
+ nla_total_size(strlen(drop_reasons[reason]) + 1) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -606,7 +628,7 @@ nla_put_failure:
static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
size_t payload_len)
{
- u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
void *hdr;
@@ -620,10 +642,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
+ NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_REASON,
+ drop_reasons[cb->reason]))
goto nla_put_failure;
- snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ snprintf(buf, sizeof(buf), "%pS", cb->pc);
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
@@ -679,7 +706,9 @@ static void net_dm_packet_report(struct sk_buff *skb)
if (net_dm_trunc_len)
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
- msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len,
+ NET_DM_SKB_CB(skb)->reason),
+ GFP_KERNEL);
if (!msg)
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..818244068c2d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2603,7 +2603,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
+ if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2809,7 +2809,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- __clear_bit(new, &msg->sg.copy);
+ __clear_bit(new, msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+{
+ return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
+{
+ unsigned long ptr_len, ptr_off = 0;
+ skb_frag_t *next_frag, *end_frag;
+ struct skb_shared_info *sinfo;
+ void *src, *dst;
+ u8 *ptr_buf;
+
+ if (likely(xdp->data_end - xdp->data >= off + len)) {
+ src = flush ? buf : xdp->data + off;
+ dst = flush ? xdp->data + off : buf;
+ memcpy(dst, src, len);
+ return;
+ }
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ end_frag = &sinfo->frags[sinfo->nr_frags];
+ next_frag = &sinfo->frags[0];
+
+ ptr_len = xdp->data_end - xdp->data;
+ ptr_buf = xdp->data;
+
+ while (true) {
+ if (off < ptr_off + ptr_len) {
+ unsigned long copy_off = off - ptr_off;
+ unsigned long copy_len = min(len, ptr_len - copy_off);
+
+ src = flush ? buf : ptr_buf + copy_off;
+ dst = flush ? ptr_buf + copy_off : buf;
+ memcpy(dst, src, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ buf += copy_len;
+ }
+
+ if (!len || next_frag == end_frag)
+ break;
+
+ ptr_off += ptr_len;
+ ptr_buf = skb_frag_address(next_frag);
+ ptr_len = skb_frag_size(next_frag);
+ next_frag++;
+ }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ u32 size = xdp->data_end - xdp->data;
+ void *addr = xdp->data;
+ int i;
+
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return ERR_PTR(-EFAULT);
+
+ if (offset + len > xdp_get_buff_len(xdp))
+ return ERR_PTR(-EINVAL);
+
+ if (offset < size) /* linear area */
+ goto out;
+
+ offset -= size;
+ for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+ u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+ if (offset < frag_size) {
+ addr = skb_frag_address(&sinfo->frags[i]);
+ size = frag_size;
+ break;
+ }
+ offset -= frag_size;
+ }
+out:
+ return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+ else
+ memcpy(buf, ptr, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+ .func = bpf_xdp_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+ else
+ memcpy(ptr, buf, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+ .func = bpf_xdp_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+ struct xdp_rxq_info *rxq = xdp->rxq;
+ unsigned int tailroom;
+
+ if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+ return -EOPNOTSUPP;
+
+ tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ if (unlikely(offset > tailroom))
+ return -EINVAL;
+
+ memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+ skb_frag_size_add(frag, offset);
+ sinfo->xdp_frags_size += offset;
+
+ return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, n_frags_free = 0, len_free = 0;
+
+ if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+ return -EINVAL;
+
+ for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ int shrink = min_t(int, offset, skb_frag_size(frag));
+
+ len_free += shrink;
+ offset -= shrink;
+
+ if (skb_frag_size(frag) == shrink) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem,
+ false, NULL);
+ n_frags_free++;
+ } else {
+ skb_frag_size_sub(frag, shrink);
+ break;
+ }
+ }
+ sinfo->nr_frags -= n_frags_free;
+ sinfo->xdp_frags_size -= len_free;
+
+ if (unlikely(!sinfo->nr_frags)) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp->data_end -= offset;
+ }
+
+ return 0;
+}
+
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
+ if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+ if (offset < 0)
+ return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+ return bpf_xdp_frags_increase_tail(xdp, offset);
+ }
+
/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
@@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
+ /* XDP_REDIRECT is not fully supported yet for xdp frags since
+ * not all XDP capable drivers can map non-linear xdp_frame in
+ * ndo_xdp_xmit.
+ */
+ if (unlikely(xdp_buff_has_frags(xdp) &&
+ map_type != BPF_MAP_TYPE_CPUMAP))
+ return -EOPNOTSUPP;
+
if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
@@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
};
#endif
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
unsigned long off, unsigned long len)
{
- memcpy(dst_buff, src_buff + off, len);
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+ bpf_xdp_copy_buf(xdp, off, dst, len, false);
return 0;
}
@@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(!xdp ||
- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+ if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
return -EFAULT;
- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+ return bpf_event_output(map, flags, meta, meta_size, xdp,
xdp_size, bpf_xdp_copy);
}
@@ -4862,6 +5091,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
case SO_REUSEPORT:
sk->sk_reuseport = valbool;
break;
+ case SO_TXREHASH:
+ if (val < -1 || val > 1) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_txrehash = (u8)val;
+ break;
default:
ret = -EINVAL;
}
@@ -5040,6 +5276,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
case SO_REUSEPORT:
*((int *)optval) = sk->sk_reuseport;
break;
+ case SO_TXREHASH:
+ *((int *)optval) = sk->sk_txrehash;
+ break;
default:
goto err_clear;
}
@@ -7533,6 +7772,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_proto;
+ case BPF_FUNC_xdp_load_bytes:
+ return &bpf_xdp_load_bytes_proto;
+ case BPF_FUNC_xdp_store_bytes:
+ return &bpf_xdp_store_bytes_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
case BPF_FUNC_check_mtu:
@@ -8030,6 +8275,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -8041,7 +8287,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8050,6 +8295,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -10062,7 +10315,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
- .check_kfunc_call = bpf_prog_test_check_kfunc_call,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
@@ -10601,7 +10853,8 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case offsetof(struct bpf_sk_lookup, remote_port) ...
+ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..ee5e7e889d8b 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -459,29 +459,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
- NAPI_GRO_CB(skb)->same_flow = 0;
+ BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+ sizeof(u32))); /* Avoid slow unaligned acc */
+ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->free = 0;
- NAPI_GRO_CB(skb)->encap_mark = 0;
- NAPI_GRO_CB(skb)->recursion_counter = 0;
- NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
- NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- NAPI_GRO_CB(skb)->csum_valid = 0;
break;
- default:
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- NAPI_GRO_CB(skb)->csum_valid = 0;
}
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
@@ -634,7 +627,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
if (unlikely(skb->slow_gro)) {
skb_orphan(skb);
skb_ext_reset(skb);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index b0f5344d1185..95098d1a49bd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -166,10 +166,10 @@ static void linkwatch_do_dev(struct net_device *dev)
netdev_state_change(dev);
}
- /* Note: our callers are responsible for
- * calling netdev_tracker_free().
+ /* Note: our callers are responsible for calling netdev_tracker_free().
+ * This is the reason we use __dev_put() instead of dev_put().
*/
- dev_put(dev);
+ __dev_put(dev);
}
static void __linkwatch_run_queue(int urgent_only)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a5b5bb99c644..0ec2f5906a27 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
#endif
-struct net init_net = {
- .ns.count = REFCOUNT_INIT(1),
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
-#ifdef CONFIG_KEYS
- .key_domain = &init_net_key_domain,
-#endif
-};
+struct net init_net;
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
@@ -301,6 +295,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
return peer;
}
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/*
* setup_net runs the initializers for the network namespace object.
@@ -363,6 +358,8 @@ out_undo:
static int __net_init net_defaults_init_net(struct net *net)
{
net->core.sysctl_somaxconn = SOMAXCONN;
+ net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+
return 0;
}
@@ -1084,7 +1081,7 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
-static int __init net_ns_init(void)
+void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1105,6 +1102,9 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
+#ifdef CONFIG_KEYS
+ init_net.key_domain = &init_net_key_domain;
+#endif
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
@@ -1119,12 +1119,8 @@ static int __init net_ns_init(void)
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
RTNL_FLAG_DOIT_UNLOCKED);
-
- return 0;
}
-pure_initcall(net_ns_init);
-
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
ops_pre_exit_list(ops, net_exit_list);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index bd62c01a2ec3..e25d359d84d9 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -423,11 +423,6 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
- /* It is not the last user for the page frag case */
- if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
- page_pool_atomic_sub_frag_count_return(page, 1))
- return NULL;
-
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -471,8 +466,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
-void page_pool_put_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
{
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
@@ -480,7 +475,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_return_page(pool, page);
}
}
-EXPORT_SYMBOL(page_pool_put_page);
+EXPORT_SYMBOL(page_pool_put_defragged_page);
/* Caller must not use data area after call, as this function overwrites it */
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
@@ -491,6 +486,10 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
+ /* It is not the last user for the page frag case */
+ if (!page_pool_is_last_frag(pool, page))
+ continue;
+
page = __page_pool_put_page(pool, page, -1, false);
/* Approved for bulk recycling in ptr_ring cache */
if (page)
@@ -526,8 +525,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_atomic_sub_frag_count_return(page,
- drain_count)))
+ if (likely(page_pool_defrag_page(page, drain_count)))
return NULL;
if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
@@ -548,8 +546,7 @@ static void page_pool_free_frag(struct page_pool *pool)
pool->frag_page = NULL;
- if (!page ||
- page_pool_atomic_sub_frag_count_return(page, drain_count))
+ if (!page || page_pool_defrag_page(page, drain_count))
return;
page_pool_return_page(pool, page);
@@ -588,7 +585,7 @@ frag_reset:
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
- page_pool_set_frag_count(page, BIAS_MAX);
+ page_pool_fragment_page(page, BIAS_MAX);
return page;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 710da8a36729..a6fad3df42a8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -459,7 +459,7 @@ static void rtnl_lock_unregistering_all(void)
* setup_net() and cleanup_net() are not possible.
*/
for_each_net(net) {
- if (net->dev_unreg_count > 0) {
+ if (atomic_read(&net->dev_unreg_count) > 0) {
unregistering = true;
break;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ff806d71921..09d31a7dc68f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1447,6 +1447,15 @@ set_sndbuf:
break;
}
+ case SO_TXREHASH:
+ if (val < -1 || val > 1) {
+ ret = -EINVAL;
+ break;
+ }
+ /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ WRITE_ONCE(sk->sk_txrehash, (u8)val);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1834,6 +1843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_reserved_mem;
break;
+ case SO_TXREHASH:
+ v.val = sk->sk_txrehash;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2266,6 +2279,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+ sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
}
@@ -2611,7 +2625,8 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
switch (cmsg->cmsg_type) {
case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
return -EINVAL;
@@ -3278,6 +3293,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
+ sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1827669eedd6..2d213c4011db 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
- struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- pprog = &progs->msg_parser;
+ *pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- pprog = &progs->stream_parser;
+ *pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- pprog = &progs->stream_verdict;
+ *pprog = &progs->stream_verdict;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- pprog = &progs->skb_verdict;
+ *pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+ struct bpf_prog *old, u32 which)
+{
+ struct bpf_prog **pprog;
+ int ret;
+
+ ret = sock_map_prog_lookup(map, &pprog, which);
+ if (ret)
+ return ret;
+
if (old)
return psock_replace_prog(pprog, prog, old);
@@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ struct bpf_prog **pprog;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ u32 id = 0;
+ int ret;
+
+ if (attr->query.query_flags)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ rcu_read_lock();
+
+ ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ if (ret)
+ goto end;
+
+ prog = *pprog;
+ prog_cnt = !prog ? 0 : 1;
+
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ goto end;
+
+ /* we do not hold the refcnt, the bpf prog may be released
+ * asynchronously and the id would be set to 0.
+ */
+ id = data_race(prog->aux->id);
+ if (id == 0)
+ prog_cnt = 0;
+
+end:
+ rcu_read_unlock();
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+ (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+ copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ ret = -EFAULT;
+
+ fdput(f);
+ return ret;
+}
+
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7b4d485aac7a..dbeb8ecbcd98 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -593,6 +593,15 @@ static struct ctl_table netns_core_table[] = {
.extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
+ {
+ .procname = "txrehash",
+ .data = &init_net.core.sysctl_txrehash,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ .proc_handler = proc_dou8vec_minmax,
+ },
{ }
};
@@ -611,7 +620,7 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl;
+ struct ctl_table *tbl, *tmp;
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
@@ -619,7 +628,8 @@ static __net_init int sysctl_core_net_init(struct net *net)
if (tbl == NULL)
goto err_dup;
- tbl[0].data = &net->core.sysctl_somaxconn;
+ for (tmp = tbl; tmp->procname; tmp++)
+ tmp->data += (char *)net - (char *)&init_net;
/* Don't export any sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7aba35504986..361df312ee7f 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
}
/* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size)
{
if (!dev) {
WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
xdp_rxq->dev = dev;
xdp_rxq->queue_index = queue_index;
xdp_rxq->napi_id = napi_id;
+ xdp_rxq->frag_size = frag_size;
xdp_rxq->reg_state = REG_STATE_REGISTERED;
return 0;
}
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
{
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp)
{
struct xdp_mem_allocator *xa;
struct page *page;
@@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
void xdp_return_frame(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_mem_allocator *xa;
if (mem->type != MEM_TYPE_PAGE_POOL) {
- __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+ xdp_return_frame(xdpf);
return;
}
@@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
}
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ struct skb_shared_info *sinfo;
+ int i;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ bq->q[bq->count++] = skb_frag_address(frag);
+ if (bq->count == XDP_BULK_QUEUE_SIZE)
+ xdp_flush_frame_bulk(bq);
+ }
+ }
bq->q[bq->count++] = xdpf->data;
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
void xdp_return_buff(struct xdp_buff *xdp)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+ }
+out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
}
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
unsigned int headroom, frame_size;
void *hard_start;
+ u8 nr_frags;
+
+ /* xdp frags frame */
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ nr_frags = sinfo->nr_frags;
/* Part of headroom was reserved to xdpf */
headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
if (xdpf->metasize)
skb_metadata_set(skb, xdpf->metasize);
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdpf->frame_sz,
+ xdp_frame_is_frag_pfmemalloc(xdpf));
+
/* Essential SKB info: protocol and skb->dev */
skb->protocol = eth_type_trans(skb, dev);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5183e627468d..671c377f0889 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
- return after48(seq1, seq2) ? seq1 : seq2;
-}
-
/**
* dccp_loss_count - Approximate the number of lost data packets in a burst loss
* @s1: last known sequence number before the loss ('hole')
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0ea29270d7e5..ae662567a6cb 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v4_ctl_sk);
}
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET);
-}
-
static struct pernet_operations dccp_v4_ops = {
.init = dccp_v4_init_net,
.exit = dccp_v4_exit_net,
- .exit_batch = dccp_v4_exit_batch,
.id = &dccp_v4_pernet_id,
.size = sizeof(struct dccp_v4_pernet),
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa663518fa0e..eab3bd1ee9a0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
- .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 91e7a2202697..64d805b27add 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -22,6 +22,7 @@
#include "feat.h"
struct inet_timewait_death_row dccp_death_row = {
+ .tw_refcount = REFCOUNT_INIT(1),
.sysctl_max_tw_buckets = NR_FILE * 2,
.hashinfo = &dccp_hashinfo,
};
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index dcad3100b164..e498c927c3d0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <net/devlink.h>
+#include <net/sch_generic.h>
#include "dsa_priv.h"
@@ -1064,9 +1065,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
if (dsa_port_is_cpu(dp)) {
- err = dsa_master_setup(dp->master, dp);
+ struct net_device *master = dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
+
+ err = dsa_master_setup(master, dp);
if (err)
return err;
+
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
}
}
@@ -1081,9 +1091,19 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_cpu(dp))
- dsa_master_teardown(dp->master);
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_cpu(dp)) {
+ struct net_device *master = dp->master;
+
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
+ */
+ dsa_tree_master_admin_state_change(dst, master, false);
+
+ dsa_master_teardown(master);
+ }
+ }
rtnl_unlock();
}
@@ -1279,6 +1299,52 @@ out_unlock:
return err;
}
+static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master)
+{
+ struct dsa_notifier_master_state_info info;
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+
+ info.master = master;
+ info.operational = dsa_port_master_is_operational(cpu_dp);
+
+ dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+}
+
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (up && cpu_dp->master_oper_up))
+ notify = true;
+
+ cpu_dp->master_admin_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (cpu_dp->master_admin_up && up))
+ notify = true;
+
+ cpu_dp->master_oper_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 760306f0012f..2bbfa9efe9f8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -40,6 +40,7 @@ enum {
DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
+ DSA_NOTIFIER_MASTER_STATE_CHANGE,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -109,6 +110,12 @@ struct dsa_notifier_tag_8021q_vlan_info {
u16 vid;
};
+/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
+struct dsa_notifier_master_state_info {
+ const struct net_device *master;
+ bool operational;
+};
+
struct dsa_switchdev_event_work {
struct dsa_switch *ds;
int port;
@@ -482,6 +489,12 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
void dsa_bridge_num_put(const struct net_device *bridge_dev,
unsigned int bridge_num);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 22241afcac81..2b5b0f294233 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2346,6 +2346,36 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
err = dsa_port_lag_change(dp, info->lower_state_info);
return notifier_from_errno(err);
}
+ case NETDEV_CHANGE:
+ case NETDEV_UP: {
+ /* Track state of master port.
+ * DSA driver may require the master port (and indirectly
+ * the tagger) to be available for some special operation.
+ */
+ if (netdev_uses_dsa(dev)) {
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->ds->dst;
+
+ /* Track when the master port is UP */
+ dsa_tree_master_oper_state_change(dst, dev,
+ netif_oper_up(dev));
+
+ /* Track when the master port is ready and can accept
+ * packet.
+ * NETDEV_UP event is not enough to flag a port as ready.
+ * We also have to wait for linkwatch_do_dev to dev_activate
+ * and emit a NETDEV_CHANGE event.
+ * We check if a master port is ready by checking if the dev
+ * have a qdisc assigned and is not noop.
+ */
+ dsa_tree_master_admin_state_change(dst, dev,
+ !qdisc_tx_is_noop(dev));
+
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+ }
case NETDEV_GOING_DOWN: {
struct dsa_port *dp, *cpu_dp;
struct dsa_switch_tree *dst;
@@ -2357,6 +2387,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
cpu_dp = dev->dsa_ptr;
dst = cpu_dp->ds->dst;
+ dsa_tree_master_admin_state_change(dst, dev, false);
+
list_for_each_entry(dp, &dst->ports, list) {
if (!dsa_port_is_user(dp))
continue;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e3c7d2627a61..4866b58649e4 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
return dsa_tag_8021q_bridge_join(ds, info);
}
-static int dsa_switch_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
+static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
bool vlan_filtering;
struct dsa_port *dp;
int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_leave)
- ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_leave)
- ds->ops->crosschip_bridge_leave(ds, info->tree_index,
- info->sw_index, info->port,
- info->bridge);
-
if (ds->needs_standalone_vlan_filtering &&
!br_vlan_enabled(info->bridge.dev)) {
change_vlan_filtering = true;
@@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
return err;
}
+ return 0;
+}
+
+static int dsa_switch_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ int err;
+
+ if (dst->index == info->tree_index && ds->index == info->sw_index &&
+ ds->ops->port_bridge_leave)
+ ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+
+ if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+ ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+ info->sw_index, info->port,
+ info->bridge);
+
+ if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
+ err = dsa_switch_sync_vlan_filtering(ds, info);
+ if (err)
+ return err;
+ }
+
return dsa_tag_8021q_bridge_leave(ds, info);
}
@@ -683,6 +697,18 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
return 0;
}
+static int
+dsa_switch_master_state_change(struct dsa_switch *ds,
+ struct dsa_notifier_master_state_info *info)
+{
+ if (!ds->ops->master_state_change)
+ return 0;
+
+ ds->ops->master_state_change(ds, info->master, info->operational);
+
+ return 0;
+}
+
static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
{
@@ -756,6 +782,9 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
err = dsa_switch_tag_8021q_vlan_del(ds, info);
break;
+ case DSA_NOTIFIER_MASTER_STATE_CHANGE:
+ err = dsa_switch_master_state_change(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1ea9401b8ace..57d2e00f1e5d 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -4,30 +4,12 @@
*/
#include <linux/etherdevice.h>
+#include <linux/bitfield.h>
+#include <net/dsa.h>
+#include <linux/dsa/tag_qca.h>
#include "dsa_priv.h"
-#define QCA_HDR_LEN 2
-#define QCA_HDR_VERSION 0x2
-
-#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14)
-#define QCA_HDR_RECV_VERSION_S 14
-#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11)
-#define QCA_HDR_RECV_PRIORITY_S 11
-#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6)
-#define QCA_HDR_RECV_TYPE_S 6
-#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3)
-#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
-
-#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14)
-#define QCA_HDR_XMIT_VERSION_S 14
-#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11)
-#define QCA_HDR_XMIT_PRIORITY_S 11
-#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8)
-#define QCA_HDR_XMIT_CONTROL_S 8
-#define QCA_HDR_XMIT_FROM_CPU BIT(7)
-#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0)
-
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -40,8 +22,9 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
phdr = dsa_etype_header_pos_tx(skb);
/* Set the version field, and set destination port information */
- hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
- QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
+ hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION);
+ hdr |= QCA_HDR_XMIT_FROM_CPU;
+ hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(dp->index));
*phdr = htons(hdr);
@@ -50,10 +33,17 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
- u8 ver;
- u16 hdr;
- int port;
+ struct qca_tagger_data *tagger_data;
+ struct dsa_port *dp = dev->dsa_ptr;
+ struct dsa_switch *ds = dp->ds;
+ u8 ver, pk_type;
__be16 *phdr;
+ int port;
+ u16 hdr;
+
+ BUILD_BUG_ON(sizeof(struct qca_mgmt_ethhdr) != QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN);
+
+ tagger_data = ds->tagger_data;
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
return NULL;
@@ -62,16 +52,33 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
hdr = ntohs(*phdr);
/* Make sure the version is correct */
- ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+ ver = FIELD_GET(QCA_HDR_RECV_VERSION, hdr);
if (unlikely(ver != QCA_HDR_VERSION))
return NULL;
+ /* Get pk type */
+ pk_type = FIELD_GET(QCA_HDR_RECV_TYPE, hdr);
+
+ /* Ethernet mgmt read/write packet */
+ if (pk_type == QCA_HDR_RECV_TYPE_RW_REG_ACK) {
+ if (likely(tagger_data->rw_reg_ack_handler))
+ tagger_data->rw_reg_ack_handler(ds, skb);
+ return NULL;
+ }
+
+ /* Ethernet MIB counter packet */
+ if (pk_type == QCA_HDR_RECV_TYPE_MIB) {
+ if (likely(tagger_data->mib_autocast_handler))
+ tagger_data->mib_autocast_handler(ds, skb);
+ return NULL;
+ }
+
/* Remove QCA tag and recalculate checksum */
skb_pull_rcsum(skb, QCA_HDR_LEN);
dsa_strip_etype_header(skb, QCA_HDR_LEN);
/* Get source port information */
- port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+ port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev)
@@ -80,12 +87,34 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
return skb;
}
+static int qca_tag_connect(struct dsa_switch *ds)
+{
+ struct qca_tagger_data *tagger_data;
+
+ tagger_data = kzalloc(sizeof(*tagger_data), GFP_KERNEL);
+ if (!tagger_data)
+ return -ENOMEM;
+
+ ds->tagger_data = tagger_data;
+
+ return 0;
+}
+
+static void qca_tag_disconnect(struct dsa_switch *ds)
+{
+ kfree(ds->tagger_data);
+ ds->tagger_data = NULL;
+}
+
static const struct dsa_device_ops qca_netdev_ops = {
.name = "qca",
.proto = DSA_TAG_PROTO_QCA,
+ .connect = qca_tag_connect,
+ .disconnect = qca_tag_disconnect,
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
.needed_headroom = QCA_HDR_LEN,
+ .promisc_on_master = true,
};
MODULE_LICENSE("GPL");
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index c1d5f5e0fdc9..18a5035d3bee 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -53,7 +53,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */
nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */
nla_total_size(sizeof(u32)) + /* _RINGS_TX */
- nla_total_size(sizeof(u32)); /* _RINGS_RX_BUF_LEN */
+ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
+ nla_total_size(sizeof(u8)); /* _RINGS_TCP_DATA_SPLIT */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -61,9 +62,11 @@ static int rings_fill_reply(struct sk_buff *skb,
const struct ethnl_reply_data *reply_base)
{
const struct rings_reply_data *data = RINGS_REPDATA(reply_base);
- const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam;
+ const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam;
const struct ethtool_ringparam *ringparam = &data->ringparam;
+ WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+
if ((ringparam->rx_max_pending &&
(nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX,
ringparam->rx_max_pending) ||
@@ -84,9 +87,11 @@ static int rings_fill_reply(struct sk_buff *skb,
ringparam->tx_max_pending) ||
nla_put_u32(skb, ETHTOOL_A_RINGS_TX,
ringparam->tx_pending))) ||
- (kernel_ringparam->rx_buf_len &&
- (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN,
- kernel_ringparam->rx_buf_len))))
+ (kr->rx_buf_len &&
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) ||
+ (kr->tcp_data_split &&
+ (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
+ kr->tcp_data_split))))
return -EMSGSIZE;
return 0;
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 99f3af1a9d4d..fe6094e9a2db 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/debugfs.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
@@ -28,6 +29,7 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
{
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
+ int i;
seq_printf(sfp, "Node Table entries for (%s) device\n",
(priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
@@ -39,22 +41,28 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
seq_puts(sfp, "DAN-H\n");
rcu_read_lock();
- list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
- /* skip self node */
- if (hsr_addr_is_self(priv, node->macaddress_A))
- continue;
- seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
- seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
- seq_printf(sfp, "%14x, ", node->addr_B_port);
-
- if (priv->prot_version == PRP_V1)
- seq_printf(sfp, "%5x, %5x, %5x\n",
- node->san_a, node->san_b,
- (node->san_a == 0 && node->san_b == 0));
- else
- seq_printf(sfp, "%5x\n", 1);
+
+ for (i = 0 ; i < priv->hash_buckets; i++) {
+ hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) {
+ /* skip self node */
+ if (hsr_addr_is_self(priv, node->macaddress_A))
+ continue;
+ seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+ seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_A]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_B]);
+ seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+ if (priv->prot_version == PRP_V1)
+ seq_printf(sfp, "%5x, %5x, %5x\n",
+ node->san_a, node->san_b,
+ (node->san_a == 0 &&
+ node->san_b == 0));
+ else
+ seq_printf(sfp, "%5x\n", 1);
+ }
}
rcu_read_unlock();
return 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e57fdad9ef94..7f250216433d 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -485,12 +485,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
{
bool unregister = false;
struct hsr_priv *hsr;
- int res;
+ int res, i;
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
- INIT_LIST_HEAD(&hsr->node_db);
- INIT_LIST_HEAD(&hsr->self_node_db);
+ INIT_HLIST_HEAD(&hsr->self_node_db);
+ hsr->hash_buckets = HSR_HSIZE;
+ get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed));
+ for (i = 0; i < hsr->hash_buckets; i++)
+ INIT_HLIST_HEAD(&hsr->node_db[i]);
+
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index e59cbb4f0cd1..5bf357734b11 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -570,20 +570,23 @@ static int fill_frame_info(struct hsr_frame_info *frame,
struct ethhdr *ethhdr;
__be16 proto;
int ret;
+ u32 hash;
/* Check if skb contains ethhdr */
if (skb->mac_len < sizeof(struct ethhdr))
return -EINVAL;
memset(frame, 0, sizeof(*frame));
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ hash = hsr_mac_hash(port->hsr, ethhdr->h_source);
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
+ frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb,
frame->is_supervision,
port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
proto = ethhdr->h_proto;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 0775f0f95dbf..b3c6ffa1894d 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -15,11 +15,28 @@
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/rculist.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
#include "hsr_netlink.h"
-/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
+{
+ u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
+
+ return reciprocal_scale(hash, hsr->hash_buckets);
+}
+
+struct hsr_node *hsr_node_get_first(struct hlist_head *head)
+{
+ struct hlist_node *first;
+
+ first = rcu_dereference(hlist_first_rcu(head));
+ if (first)
+ return hlist_entry(first, struct hsr_node, mac_list);
+
+ return NULL;
+}
/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
* false otherwise.
@@ -42,8 +59,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_node *node;
- node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
- mac_list);
+ node = hsr_node_get_first(&hsr->self_node_db);
if (!node) {
WARN_ONCE(1, "HSR: No self node\n");
return false;
@@ -59,12 +75,12 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
/* Search for mac entry. Caller must hold rcu read lock.
*/
-static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
const unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
return node;
}
@@ -79,7 +95,7 @@ int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
const unsigned char addr_b[ETH_ALEN])
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -90,14 +106,13 @@ int hsr_create_self_node(struct hsr_priv *hsr,
ether_addr_copy(node->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
- oldnode = list_first_or_null_rcu(self_node_db,
- struct hsr_node, mac_list);
+ oldnode = hsr_node_get_first(self_node_db);
if (oldnode) {
- list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+ hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(oldnode, rcu_head);
} else {
- list_add_tail_rcu(&node->mac_list, self_node_db);
+ hlist_add_tail_rcu(&node->mac_list, self_node_db);
spin_unlock_bh(&hsr->list_lock);
}
@@ -106,25 +121,25 @@ int hsr_create_self_node(struct hsr_priv *hsr,
void hsr_del_self_node(struct hsr_priv *hsr)
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node;
spin_lock_bh(&hsr->list_lock);
- node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+ node = hsr_node_get_first(self_node_db);
if (node) {
- list_del_rcu(&node->mac_list);
+ hlist_del_rcu(&node->mac_list);
kfree_rcu(node, rcu_head);
}
spin_unlock_bh(&hsr->list_lock);
}
-void hsr_del_nodes(struct list_head *node_db)
+void hsr_del_nodes(struct hlist_head *node_db)
{
struct hsr_node *node;
- struct hsr_node *tmp;
+ struct hlist_node *tmp;
- list_for_each_entry_safe(node, tmp, node_db, mac_list)
- kfree(node);
+ hlist_for_each_entry_safe(node, tmp, node_db, mac_list)
+ kfree_rcu(node, rcu_head);
}
void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -145,7 +160,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
* originating from the newly added node.
*/
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
- struct list_head *node_db,
+ struct hlist_head *node_db,
unsigned char addr[],
u16 seq_out, bool san,
enum hsr_port_type rx_port)
@@ -175,14 +190,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list,
- lockdep_is_held(&hsr->list_lock)) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
goto out;
}
- list_add_tail_rcu(&new_node->mac_list, node_db);
+ hlist_add_tail_rcu(&new_node->mac_list, node_db);
spin_unlock_bh(&hsr->list_lock);
return new_node;
out:
@@ -202,7 +217,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup)
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port)
{
@@ -218,7 +233,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
if (hsr->proto_ops->update_san_info)
hsr->proto_ops->update_san_info(node, is_sup);
@@ -268,11 +283,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
struct hsr_sup_tlv *hsr_sup_tlv;
struct hsr_node *node_real;
struct sk_buff *skb = NULL;
- struct list_head *node_db;
+ struct hlist_head *node_db;
struct ethhdr *ethhdr;
int i;
unsigned int pull_size = 0;
unsigned int total_pull_size = 0;
+ u32 hash;
/* Here either frame->skb_hsr or frame->skb_prp should be
* valid as supervision frame always will have protocol
@@ -310,11 +326,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
- node_db = &port_rcv->hsr->node_db;
- node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+ node_db = port_rcv->hsr->node_db;
+ hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A);
+ node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
- node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+ node_real = hsr_add_node(hsr, &node_db[hash],
+ hsr_sp->macaddress_A,
HSR_SEQNR_START - 1, true,
port_rcv->type);
if (!node_real)
@@ -348,7 +366,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Check if redbox mac and node mac are equal. */
- if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
+ if (!ether_addr_equal(node_real->macaddress_A,
+ hsr_sp->macaddress_A)) {
/* This is a redbox supervision frame for a VDAN! */
goto done;
}
@@ -368,7 +387,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
- list_del_rcu(&node_curr->mac_list);
+ hlist_del_rcu(&node_curr->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(node_curr, rcu_head);
@@ -406,6 +425,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
struct hsr_port *port)
{
struct hsr_node *node_dst;
+ u32 hash;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
@@ -415,7 +435,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
return;
- node_dst = find_node_by_addr_A(&port->hsr->node_db,
+ hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest);
+ node_dst = find_node_by_addr_A(&port->hsr->node_db[hash],
eth_hdr(skb)->h_dest);
if (!node_dst) {
if (net_ratelimit())
@@ -491,59 +512,73 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+ struct hlist_node *tmp;
struct hsr_node *node;
- struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
+ int i;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
- /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
- * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
- * the master port. Thus the master node will be repeatedly
- * pruned leading to packet loss.
- */
- if (hsr_addr_is_self(hsr, node->macaddress_A))
- continue;
-
- /* Shorthand */
- time_a = node->time_in[HSR_PT_SLAVE_A];
- time_b = node->time_in[HSR_PT_SLAVE_B];
-
- /* Check for timestamps old enough to risk wrap-around */
- if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_A] = true;
- if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_B] = true;
-
- /* Get age of newest frame from node.
- * At least one time_in is OK here; nodes get pruned long
- * before both time_ins can get stale
- */
- timestamp = time_a;
- if (node->time_in_stale[HSR_PT_SLAVE_A] ||
- (!node->time_in_stale[HSR_PT_SLAVE_B] &&
- time_after(time_b, time_a)))
- timestamp = time_b;
-
- /* Warn of ring error only as long as we get frames at all */
- if (time_is_after_jiffies(timestamp +
- msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
- rcu_read_lock();
- port = get_late_port(hsr, node);
- if (port)
- hsr_nl_ringerror(hsr, node->macaddress_A, port);
- rcu_read_unlock();
- }
- /* Prune old entries */
- if (time_is_before_jiffies(timestamp +
- msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
- hsr_nl_nodedown(hsr, node->macaddress_A);
- list_del_rcu(&node->mac_list);
- /* Note that we need to free this entry later: */
- kfree_rcu(node, rcu_head);
+ for (i = 0; i < hsr->hash_buckets; i++) {
+ hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i],
+ mac_list) {
+ /* Don't prune own node.
+ * Neither time_in[HSR_PT_SLAVE_A]
+ * nor time_in[HSR_PT_SLAVE_B], will ever be updated
+ * for the master port. Thus the master node will be
+ * repeatedly pruned leading to packet loss.
+ */
+ if (hsr_addr_is_self(hsr, node->macaddress_A))
+ continue;
+
+ /* Shorthand */
+ time_a = node->time_in[HSR_PT_SLAVE_A];
+ time_b = node->time_in[HSR_PT_SLAVE_B];
+
+ /* Check for timestamps old enough to
+ * risk wrap-around
+ */
+ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_A] = true;
+ if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_B] = true;
+
+ /* Get age of newest frame from node.
+ * At least one time_in is OK here; nodes get pruned
+ * long before both time_ins can get stale
+ */
+ timestamp = time_a;
+ if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+ (!node->time_in_stale[HSR_PT_SLAVE_B] &&
+ time_after(time_b, time_a)))
+ timestamp = time_b;
+
+ /* Warn of ring error only as long as we get
+ * frames at all
+ */
+ if (time_is_after_jiffies(timestamp +
+ msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
+ rcu_read_lock();
+ port = get_late_port(hsr, node);
+ if (port)
+ hsr_nl_ringerror(hsr,
+ node->macaddress_A,
+ port);
+ rcu_read_unlock();
+ }
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ hlist_del_rcu(&node->mac_list);
+ /* Note that we need to free this
+ * entry later:
+ */
+ kfree_rcu(node, rcu_head);
+ }
}
}
spin_unlock_bh(&hsr->list_lock);
@@ -557,17 +592,19 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
if (!_pos) {
- node = list_first_or_null_rcu(&hsr->node_db,
- struct hsr_node, mac_list);
+ node = hsr_node_get_first(&hsr->node_db[hash]);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
}
node = _pos;
- list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
+ hlist_for_each_entry_continue_rcu(node, mac_list) {
ether_addr_copy(addr, node->macaddress_A);
return node;
}
@@ -587,8 +624,11 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_node *node;
struct hsr_port *port;
unsigned long tdiff;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
- node = find_node_by_addr_A(&hsr->node_db, addr);
+ node = find_node_by_addr_A(&hsr->node_db[hash], addr);
if (!node)
return -ENOENT;
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index bdbb8c822ba1..d7cce6b161e3 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,9 +28,11 @@ struct hsr_frame_info {
bool is_from_san;
};
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
+struct hsr_node *hsr_node_get_first(struct hlist_head *head);
void hsr_del_self_node(struct hsr_priv *hsr);
-void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+void hsr_del_nodes(struct hlist_head *node_db);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port);
void hsr_handle_sup_frame(struct hsr_frame_info *frame);
@@ -68,7 +70,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
void prp_update_san_info(struct hsr_node *node, bool is_sup);
struct hsr_node {
- struct list_head mac_list;
+ struct hlist_node mac_list;
unsigned char macaddress_A[ETH_ALEN];
unsigned char macaddress_B[ETH_ALEN];
/* Local slave through which AddrB frames are received from this node */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 043e4e9a1694..ca556bda3467 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -63,6 +63,9 @@ struct hsr_tag {
#define HSR_V1_SUP_LSDUSIZE 52
+#define HSR_HSIZE_SHIFT 8
+#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT)
+
/* The helper functions below assumes that 'path' occupies the 4 most
* significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
* equivalently, the 4 most significant bits of HSR tag byte 14).
@@ -201,8 +204,8 @@ struct hsr_proto_ops {
struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
- struct list_head node_db; /* Known HSR nodes */
- struct list_head self_node_db; /* MACs of slaves */
+ struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */
+ struct hlist_head self_node_db; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
int announce_count;
@@ -212,6 +215,8 @@ struct hsr_priv {
spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
+ u32 hash_buckets;
+ u32 hash_seed;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
* based on SLAVE_A or SLAVE_B
*/
@@ -259,11 +264,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
return ntohs(rct->sequence_nr);
}
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
- return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
/* assume there is a valid rct */
static inline bool prp_check_lsdu_size(struct sk_buff *skb,
struct prp_rct *rct,
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f3c8f91dbe2c..1405c037cf7a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -105,6 +105,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
+ int i;
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
@@ -113,7 +114,8 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_ports(hsr);
hsr_del_self_node(hsr);
- hsr_del_nodes(&hsr->node_db);
+ for (i = 0; i < hsr->hash_buckets; i++)
+ hsr_del_nodes(&hsr->node_db[i]);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2cf62718a282..2c087b7f17c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/ieee802154.h>
+#include <linux/if_arp.h>
#include <net/ipv6.h>
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index dd5a45f8a78a..359249ab77bf 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -30,7 +30,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
{
void *hdr;
int i, pages = 0;
- uint32_t *buf = kcalloc(32, sizeof(uint32_t), GFP_KERNEL);
+ u32 *buf = kcalloc(IEEE802154_MAX_PAGE + 1, sizeof(u32), GFP_KERNEL);
pr_debug("%s\n", __func__);
@@ -47,7 +47,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) ||
nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel))
goto nla_put_failure;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i <= IEEE802154_MAX_PAGE; i++) {
if (phy->supported.channels[i])
buf[pages++] = phy->supported.channels[i] | (i << 27);
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index de610cb83694..f79ab942f03b 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
@@ -95,12 +96,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
size_t end;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
@@ -212,26 +215,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
}
}
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
BTF_ID(func, tcp_reno_ssthresh)
BTF_ID(func, tcp_reno_cong_avoid)
BTF_ID(func, tcp_reno_undo_cwnd)
BTF_ID(func, tcp_slow_start)
BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
- if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
- return true;
- return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
.get_func_proto = bpf_tcp_ca_get_func_proto,
.is_valid_access = bpf_tcp_ca_is_valid_access,
.btf_struct_access = bpf_tcp_ca_btf_struct_access,
- .check_kfunc_call = bpf_tcp_ca_check_kfunc_call,
};
static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +300,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.name = "tcp_congestion_ops",
};
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..54811728d906 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -735,8 +736,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
memset(cfg, 0, sizeof(*cfg));
rtm = nlmsg_data(nlh);
+
+ if (!inet_validate_dscp(rtm->rtm_tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ err = -EINVAL;
+ goto errout;
+ }
+ cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_tos = rtm->rtm_tos;
cfg->fc_table = rtm->rtm_table;
cfg->fc_protocol = rtm->rtm_protocol;
cfg->fc_scope = rtm->rtm_scope;
@@ -1547,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
- rtnl_lock();
+ ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1572,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
- rtnl_unlock();
+
kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
@@ -1599,7 +1608,9 @@ out:
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
+ rtnl_lock();
ip_fib_net_exit(net);
+ rtnl_unlock();
goto out;
}
@@ -1607,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
- ip_fib_net_exit(net);
+}
+
+static void __net_exit fib_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip_fib_net_exit(net);
+
+ rtnl_unlock();
}
static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
+ .exit_batch = fib_net_exit_batch,
};
void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..a63014b54809 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -4,13 +4,14 @@
#include <linux/types.h>
#include <linux/list.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
struct fib_info *fa_info;
- u8 fa_tos;
+ dscp_t fa_dscp;
u8 fa_type;
u8 fa_state;
u8 fa_slen;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e0b6c8b6de57..001fea394bde 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
@@ -35,7 +36,7 @@ struct fib4_rule {
struct fib_rule common;
u8 dst_len;
u8 src_len;
- u8 tos;
+ dscp_t dscp;
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule)
{
struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
- if (r->dst_len || r->src_len || r->tos)
+ if (r->dst_len || r->src_len || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->tos && (r->tos != fl4->flowi4_tos))
+ if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -225,10 +226,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ goto errout;
+ }
+ /* IPv4 currently doesn't handle high order DSCP bits correctly */
if (frh->tos & ~IPTOS_TOS_MASK) {
NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
}
+ rule4->dscp = inet_dsfield_to_dscp(frh->tos);
/* split local/main if they are not already split */
err = fib_unmerge(net);
@@ -270,7 +278,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
rule4->dstmask = inet_make_mask(rule4->dst_len);
- rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
@@ -313,7 +320,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && (rule4->tos != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
return 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -337,7 +344,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = rule4->tos;
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b4589861b84c..c9c4f2f66b38 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
#include <linux/hash.h>
#include <net/arp.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -523,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = fa->fa_tos;
+ fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
@@ -1257,34 +1258,13 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val)
return &fib_info_laddrhash[slot];
}
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
- if (bytes <= PAGE_SIZE)
- return kzalloc(bytes, GFP_KERNEL);
- else
- return (struct hlist_head *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
-{
- if (!hash)
- return;
-
- if (bytes <= PAGE_SIZE)
- kfree(hash);
- else
- free_pages((unsigned long) hash, get_order(bytes));
-}
-
static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *new_laddrhash,
unsigned int new_size)
{
struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_info_hash_size;
- unsigned int i, bytes;
+ unsigned int i;
spin_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
@@ -1325,9 +1305,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
spin_unlock_bh(&fib_info_lock);
- bytes = old_size * sizeof(struct hlist_head *);
- fib_info_hash_free(old_info_hash, bytes);
- fib_info_hash_free(old_laddrhash, bytes);
+ kvfree(old_info_hash);
+ kvfree(old_laddrhash);
}
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1444,19 +1423,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
- unsigned int bytes;
+ size_t bytes;
if (!new_size)
new_size = 16;
- bytes = new_size * sizeof(struct hlist_head *);
- new_info_hash = fib_info_hash_alloc(bytes);
- new_laddrhash = fib_info_hash_alloc(bytes);
+ bytes = (size_t)new_size * sizeof(struct hlist_head *);
+ new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+ new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
if (!new_info_hash || !new_laddrhash) {
- fib_info_hash_free(new_info_hash, bytes);
- fib_info_hash_free(new_laddrhash, bytes);
- } else
+ kvfree(new_info_hash);
+ kvfree(new_laddrhash);
+ } else {
fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+ }
if (!fib_info_hash_size)
goto failure;
}
@@ -2061,7 +2040,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
int order = -1, last_idx = -1;
struct fib_alias *fa, *fa1 = NULL;
u32 last_prio = res->fi->fib_priority;
- u8 last_tos = 0;
+ dscp_t last_dscp = 0;
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
@@ -2069,19 +2048,20 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
continue;
if (fa->tb_id != tb->tb_id)
continue;
if (next_fi->fib_priority > last_prio &&
- fa->fa_tos == last_tos) {
- if (last_tos)
+ fa->fa_dscp == last_dscp) {
+ if (last_dscp)
continue;
break;
}
if (next_fi->fib_flags & RTNH_F_DEAD)
continue;
- last_tos = fa->fa_tos;
+ last_dscp = fa->fa_dscp;
last_prio = next_fi->fib_priority;
if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..c05cd105e95e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -61,6 +61,7 @@
#include <linux/vmalloc.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -81,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .tos = inet_dscp_to_dsfield(fa->fa_dscp),
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -98,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .tos = inet_dscp_to_dsfield(fa->fa_dscp),
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -973,13 +974,13 @@ static struct key_vector *fib_find_node(struct trie *t,
return n;
}
-/* Return the first fib alias matching TOS with
+/* Return the first fib alias matching DSCP with
* priority less than or equal to PRIO.
* If 'find_first' is set, return the first matching
- * fib alias, regardless of TOS and priority.
+ * fib alias, regardless of DSCP and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id,
+ dscp_t dscp, u32 prio, u32 tb_id,
bool find_first)
{
struct fib_alias *fa;
@@ -988,6 +989,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
return NULL;
hlist_for_each_entry(fa, fah, fa_list) {
+ /* Avoid Sparse warning when using dscp_t in inequalities */
+ u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
+ u8 __dscp = inet_dscp_to_dsfield(dscp);
+
if (fa->fa_slen < slen)
continue;
if (fa->fa_slen != slen)
@@ -998,9 +1003,9 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
break;
if (find_first)
return fa;
- if (fa->fa_tos > tos)
+ if (__fa_dscp > __dscp)
continue;
- if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
+ if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
return fa;
}
@@ -1027,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
- fa->fa_type == fri->type)
+ fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
+ fa->fa_info == fri->fi && fa->fa_type == fri->type)
return fa;
}
@@ -1210,7 +1215,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_info *fi;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
int err;
@@ -1227,12 +1232,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto err;
}
+ dscp = cfg->fc_dscp;
l = fib_find_node(t, &tp, key);
- fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
+ fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
- * with the same keys [prefix,tos,priority], if such key already
+ * with the same keys [prefix,dscp,priority], if such key already
* exists or to the node before which we will insert new one.
*
* If fa is NULL, we will need to allocate a new one and
@@ -1240,7 +1246,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
* of the new alias.
*/
- if (fa && fa->fa_tos == tos &&
+ if (fa && fa->fa_dscp == dscp &&
fa->fa_info->fib_priority == fi->fib_priority) {
struct fib_alias *fa_first, *fa_match;
@@ -1260,7 +1266,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
hlist_for_each_entry_from(fa, fa_list) {
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
@@ -1288,7 +1294,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
fi_drop = fa->fa_info;
- new_fa->fa_tos = fa->fa_tos;
+ new_fa->fa_dscp = fa->fa_dscp;
new_fa->fa_info = fi;
new_fa->fa_type = cfg->fc_type;
state = fa->fa_state;
@@ -1351,7 +1357,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
new_fa->fa_info = fi;
- new_fa->fa_tos = tos;
+ new_fa->fa_dscp = dscp;
new_fa->fa_type = cfg->fc_type;
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
@@ -1567,7 +1573,8 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
if (fi->fib_dead)
continue;
@@ -1703,7 +1710,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
struct key_vector *l, *tp;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
key = ntohl(cfg->fc_dst);
@@ -1715,11 +1722,13 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
+ dscp = cfg->fc_dscp;
+ fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
- pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
+ pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
+ inet_dscp_to_dsfield(dscp), t);
fa_to_delete = NULL;
hlist_for_each_entry_from(fa, fa_list) {
@@ -1727,7 +1736,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -2295,7 +2304,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = fa->fa_tos;
+ fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
@@ -2807,8 +2816,9 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
fa->fa_info->fib_scope),
rtn_type(buf2, sizeof(buf2),
fa->fa_type));
- if (fa->fa_tos)
- seq_printf(seq, " tos=%d", fa->fa_tos);
+ if (fa->fa_dscp)
+ seq_printf(seq, " tos=%d",
+ inet_dscp_to_dsfield(fa->fa_dscp));
seq_putc(seq, '\n');
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b7e277d8a84d..72a375c7f417 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -192,24 +192,14 @@ struct icmp_control {
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
/* Called with BH disabled */
static inline struct sock *icmp_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmp_sk(net);
+ sk = this_cpu_read(ipv4_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path signals a
@@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
static inline void icmp_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
@@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
return 0;
}
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+ struct icmp_bxm *icmp_param,
struct flowi4 *fl4,
struct ipcm_cookie *ipc, struct rtable **rt)
{
- struct sock *sk;
struct sk_buff *skb;
- sk = icmp_sk(dev_net((*rt)->dst.dev));
if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
@@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (IS_ERR(rt))
goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
- icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock(sk);
@@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
- icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
out_unlock:
@@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
},
};
-static void __net_exit icmp_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
- free_percpu(net->ipv4.icmp_sk);
- net->ipv4.icmp_sk = NULL;
-}
-
static int __net_init icmp_sk_init(struct net *net)
{
- int i, err;
-
- net->ipv4.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.icmp_sk)
- return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct sock *sk;
-
- err = inet_ctl_sock_create(&sk, PF_INET,
- SOCK_RAW, IPPROTO_ICMP, net);
- if (err < 0)
- goto fail;
-
- *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
- /* Enough space for 2 64K ICMP packets, including
- * sk_buff/skb_shared_info struct overhead.
- */
- sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
- /*
- * Speedup sock_wfree()
- */
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
- }
-
/* Control parameters for ECHO replies. */
net->ipv4.sysctl_icmp_echo_ignore_all = 0;
net->ipv4.sysctl_icmp_echo_enable_probe = 0;
@@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
return 0;
-
-fail:
- icmp_sk_exit(net);
- return err;
}
static struct pernet_operations __net_initdata icmp_sk_ops = {
.init = icmp_sk_init,
- .exit = icmp_sk_exit,
};
int __init icmp_init(void)
{
+ int err, i;
+
+ for_each_possible_cpu(i) {
+ struct sock *sk;
+
+ err = inet_ctl_sock_create(&sk, PF_INET,
+ SOCK_RAW, IPPROTO_ICMP, &init_net);
+ if (err < 0)
+ return err;
+
+ per_cpu(ipv4_icmp_sk, i) = sk;
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff/skb_shared_info struct overhead.
+ */
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+ /*
+ * Speedup sock_wfree()
+ */
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+ }
return register_pernet_subsys(&icmp_sk_ops);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fc2a985f6064..1e5b53c2bb26 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -866,12 +866,9 @@ static void reqsk_timer_handler(struct timer_list *t)
(!resend ||
!inet_rtx_syn_ack(sk_listener, req) ||
inet_rsk(req)->acked)) {
- unsigned long timeo;
-
if (req->num_timeout++ == 0)
atomic_dec(&queue->young);
- timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
- mod_timer(&req->rsk_timer, jiffies + timeo);
+ mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
if (!nreq)
return;
@@ -1046,6 +1043,9 @@ int inet_csk_listen_start(struct sock *sk)
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
+ if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
+ sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 30ab717ff1b8..17440840a791 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb = NULL;
- spinlock_t *lock;
-
if (sk_unhashed(sk))
return;
- if (sk->sk_state == TCP_LISTEN) {
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
- } else {
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- }
- spin_lock_bh(lock);
- if (sk_unhashed(sk))
- goto unlock;
-
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_stop_listen_sock(sk);
if (ilb) {
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
inet_unhash2(hashinfo, sk);
ilb->count--;
}
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
- spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+ if (sk_unhashed(sk))
+ return;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ /* Don't disable bottom halves while acquiring the lock to
+ * avoid circular locking dependency on PREEMPT_RT.
+ */
+ spin_lock(&ilb->lock);
+ __inet_unhash(sk, ilb);
+ spin_unlock(&ilb->lock);
+ } else {
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+ spin_lock_bh(lock);
+ __inet_unhash(sk, NULL);
+ spin_unlock_bh(lock);
+ }
}
EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 437afe392e66..9e0bbd026560 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,14 +52,15 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
spin_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
- hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[tw->tw_bslot];
spin_lock(&bhead->lock);
inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
- atomic_dec(&tw->tw_dr->tw_count);
+ if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
+ kfree(tw->tw_dr);
+
inet_twsk_put(tw);
}
@@ -110,8 +111,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
- hashinfo->bhash_size)];
+ /* Cache inet_bhashfn(), because 'struct net' might be no longer
+ * available later in inet_twsk_kill().
+ */
+ tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
+ hashinfo->bhash_size);
+ bhead = &hashinfo->bhash[tw->tw_bslot];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
@@ -145,10 +150,6 @@ static void tw_timer_handler(struct timer_list *t)
{
struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
- if (tw->tw_kill)
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
- else
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
inet_twsk_kill(tw);
}
@@ -158,7 +159,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
{
struct inet_timewait_sock *tw;
- if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
@@ -244,59 +245,15 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
* of PAWS.
*/
- tw->tw_kill = timeo <= 4*HZ;
if (!rearm) {
+ bool kill = timeo <= 4*HZ;
+
+ __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+ LINUX_MIB_TIMEWAITED);
BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
- atomic_inc(&tw->tw_dr->tw_count);
+ refcount_inc(&tw->tw_dr->tw_refcount);
} else {
mod_timer_pending(&tw->tw_timer, jiffies + timeo);
}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
-
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
-{
- struct inet_timewait_sock *tw;
- struct sock *sk;
- struct hlist_nulls_node *node;
- unsigned int slot;
-
- for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-restart_rcu:
- cond_resched();
- rcu_read_lock();
-restart:
- sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT)
- continue;
- tw = inet_twsk(sk);
- if ((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))
- continue;
-
- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
- continue;
-
- if (unlikely((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))) {
- inet_twsk_put(tw);
- goto restart;
- }
-
- rcu_read_unlock();
- local_bh_disable();
- inet_twsk_deschedule_put(tw);
- local_bh_enable();
- goto restart_rcu;
- }
- /* If the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto restart;
- rcu_read_unlock();
- }
-}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3a025c011971..d94f9f7e60c3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -196,7 +196,8 @@ resubmit:
if (ipprot) {
if (!ipprot->no_policy) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_XFRM_POLICY);
return;
}
nf_reset_ct(skb);
@@ -215,7 +216,7 @@ resubmit:
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
} else {
__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -318,8 +319,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
+ int err, drop_reason;
struct rtable *rt;
- int err;
+
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (ip_can_use_hint(skb, iph, hint)) {
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -396,19 +399,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* so-called "hole-196" attack) so do it for both.
*/
if (in_dev &&
- IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+ drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
goto drop;
+ }
}
return NET_RX_SUCCESS;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV)
+ if (err == -EXDEV) {
+ drop_reason = SKB_DROP_REASON_IP_RPFILTER;
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ }
goto drop;
}
@@ -436,13 +443,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
+ int drop_reason;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
- if (skb->pkt_type == PACKET_OTHERHOST)
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
+ }
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
@@ -452,6 +462,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
goto out;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
@@ -488,6 +499,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
len = ntohs(iph->tot_len);
if (skb->len < len) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
@@ -516,11 +528,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
return skb;
csum_error:
+ drop_reason = SKB_DROP_REASON_IP_CSUM;
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
+ if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ drop_reason = SKB_DROP_REASON_IP_INHDR;
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
out:
return NULL;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index da1b5038bdfd..a9e22a098872 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -42,7 +42,7 @@
*/
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
- __be32 daddr, struct rtable *rt, int is_frag)
+ __be32 daddr, struct rtable *rt)
{
unsigned char *iph = skb_network_header(skb);
@@ -53,28 +53,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
if (opt->srr)
memcpy(iph + opt->srr + iph[opt->srr + 1] - 4, &daddr, 4);
- if (!is_frag) {
- if (opt->rr_needaddr)
- ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
- if (opt->ts_needaddr)
- ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
- if (opt->ts_needtime) {
- __be32 midtime;
+ if (opt->rr_needaddr)
+ ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
+ if (opt->ts_needaddr)
+ ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
+ if (opt->ts_needtime) {
+ __be32 midtime;
- midtime = inet_current_timestamp();
- memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
- }
- return;
- }
- if (opt->rr) {
- memset(iph + opt->rr, IPOPT_NOP, iph[opt->rr + 1]);
- opt->rr = 0;
- opt->rr_needaddr = 0;
- }
- if (opt->ts) {
- memset(iph + opt->ts, IPOPT_NOP, iph[opt->ts + 1]);
- opt->ts = 0;
- opt->ts_needaddr = opt->ts_needtime = 0;
+ midtime = inet_current_timestamp();
+ memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 139cec29ed06..0c0574eb5f5b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -179,7 +179,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
- ip_options_build(skb, &opt->opt, daddr, rt, 0);
+ ip_options_build(skb, &opt->opt, daddr, rt);
}
skb->priority = sk->sk_priority;
@@ -519,7 +519,7 @@ packet_routed:
if (inet_opt && inet_opt->opt.optlen) {
iph->ihl += inet_opt->opt.optlen >> 2;
- ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
+ ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt);
}
ip_select_ident_segs(net, skb, sk,
@@ -1541,7 +1541,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
if (opt) {
iph->ihl += opt->optlen >> 2;
- ip_options_build(skb, opt, cork->addr, rt, 0);
+ ip_options_build(skb, opt, cork->addr, rt);
}
skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 29bbe2b08ae9..c860519d57ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -268,13 +268,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -330,10 +329,9 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ipmr_free_table(net->ipv4.mrt);
net->ipv4.mrt = NULL;
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -3077,7 +3075,9 @@ static int __net_init ipmr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ipmr_rules_exit(net);
+ rtnl_unlock();
#endif
ipmr_rules_fail:
ipmr_notifier_exit(net);
@@ -3092,12 +3092,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
ipmr_notifier_exit(net);
- ipmr_rules_exit(net);
+}
+
+static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ipmr_rules_exit(net);
+ rtnl_unlock();
}
static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
+ .exit_batch = ipmr_net_exit_batch,
};
int __init ip_mr_init(void)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3f248a19faa3..fab357cc8559 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -295,28 +295,24 @@ pptp_inbound_pkt(struct sk_buff *skb,
return NF_ACCEPT;
}
+static const struct nf_nat_pptp_hook pptp_hooks = {
+ .outbound = pptp_outbound_pkt,
+ .inbound = pptp_inbound_pkt,
+ .exp_gre = pptp_exp_gre,
+ .expectfn = pptp_nat_expected,
+};
+
static int __init nf_nat_helper_pptp_init(void)
{
- BUG_ON(nf_nat_pptp_hook_outbound != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
- BUG_ON(nf_nat_pptp_hook_inbound != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
- BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+ WARN_ON(nf_nat_pptp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook, &pptp_hooks);
- BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
static void __exit nf_nat_helper_pptp_fini(void)
{
- RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index eeafeccebb8d..e459a391e607 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3733,12 +3733,16 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit(struct net *net)
+static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- flush_all_nexthops(net);
+ list_for_each_entry(net, net_list, exit_list) {
+ flush_all_nexthops(net);
+ kfree(net->nexthop.devhash);
+ }
rtnl_unlock();
- kfree(net->nexthop.devhash);
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit = nexthop_net_exit,
+ .exit_batch = nexthop_net_exit_batch,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f30273afb539..28836071f0a6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
- proto_memory_allocated(&tcp_prot));
+ refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
+ sockets, proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
proto_memory_allocated(&udp_prot));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff6f91cdb6c4..634766e6c7cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -84,6 +84,7 @@
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
@@ -112,14 +113,13 @@
#define DEFAULT_MIN_PMTU (512 + 20 + 20)
#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
@@ -458,7 +458,7 @@ static u32 *ip_tstamps __read_mostly;
* if one generator is seldom used. This makes hard for an attacker
* to infer how many packets were sent between two points in time.
*/
-u32 ip_idents_reserve(u32 hash, int segs)
+static u32 ip_idents_reserve(u32 hash, int segs)
{
u32 bucket, old, now = (u32)jiffies;
atomic_t *p_id;
@@ -479,7 +479,6 @@ u32 ip_idents_reserve(u32 hash, int segs)
*/
return atomic_add_return(segs + delta, p_id) - segs;
}
-EXPORT_SYMBOL(ip_idents_reserve);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
{
@@ -1298,9 +1297,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
+ struct net *net = dev_net(dst->dev);
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
- ip_rt_min_advmss);
+ net->ipv4.ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
}
@@ -3392,7 +3392,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_tos == fri.tos &&
+ fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = fa->offload;
@@ -3535,13 +3535,6 @@ static struct ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "min_adv_mss",
- .data = &ip_rt_min_advmss,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
@@ -3569,6 +3562,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "min_adv_mss",
+ .data = &init_net.ipv4.ip_rt_min_advmss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ },
};
@@ -3631,6 +3631,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
/* Set default value for namespaceified sysctls */
net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+ net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 97eb54774924..1cae27b5dcd8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -589,6 +589,14 @@ static struct ctl_table ipv4_table[] = {
};
static struct ctl_table ipv4_net_table[] = {
+ /* tcp_max_tw_buckets must be first in this table. */
+ {
+ .procname = "tcp_max_tw_buckets",
+/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{
.procname = "icmp_echo_ignore_all",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
@@ -1001,13 +1009,6 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &two,
},
{
- .procname = "tcp_max_tw_buckets",
- .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
@@ -1400,7 +1401,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ /* skip first entry (sysctl_max_tw_buckets) */
+ for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
@@ -1415,6 +1417,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
}
+ table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
+
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 02cb275e5487..760e8221d321 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -894,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
- new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
- new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+ new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
/* We try hard to avoid divides here */
size_goal = tp->gso_segs * mss_now;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index ec5550089b4d..02e8626ccb27 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
BTF_ID(func, bbr_set_state)
#endif
#endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_bbr_check_kfunc_ids,
+};
static int __init bbr_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index e07837e23b3f..24d562dd6225 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
BTF_ID(func, cubictcp_acked)
#endif
#endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_cubic_check_kfunc_ids,
+};
static int __init cubictcp_register(void)
{
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- ret = tcp_register_congestion_control(&cubictcp);
- if (ret)
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&cubictcp);
}
static void __exit cubictcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 0d7ab3cc7b61..1943a6630341 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
BTF_ID(func, dctcp_state)
#endif
#endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &tcp_dctcp_check_kfunc_ids,
+};
static int __init dctcp_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&dctcp);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&dctcp);
}
static void __exit dctcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bfe4112e000c..af94a6d22a9d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6725,6 +6725,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
ireq->ireq_state = TCP_NEW_SYN_RECV;
write_pnet(&ireq->ireq_net, sock_net(sk_listener));
ireq->ireq_family = sk_listener->sk_family;
+ req->timeout = TCP_TIMEOUT_INIT;
}
return req;
@@ -6941,9 +6942,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
sock_put(fastopen_sk);
} else {
tcp_rsk(req)->tfo_listener = false;
- if (!want_cookie)
- inet_csk_reqsk_queue_hash_add(sk, req,
- tcp_timeout_init((struct sock *)req));
+ if (!want_cookie) {
+ req->timeout = tcp_timeout_init((struct sock *)req);
+ inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ }
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
TCP_SYNACK_COOKIE,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fec656f5a39e..6873f46fc8ba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+ struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
@@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
transmit_time);
ctl_sk->sk_mark = 0;
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
@@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
transmit_time);
ctl_sk->sk_mark = 0;
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
local_bh_enable();
}
@@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot);
static void __net_exit tcp_sk_exit(struct net *net)
{
- int cpu;
+ struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
if (net->ipv4.tcp_congestion_control)
bpf_module_put(net->ipv4.tcp_congestion_control,
net->ipv4.tcp_congestion_control->owner);
-
- for_each_possible_cpu(cpu)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
- free_percpu(net->ipv4.tcp_sk);
+ if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+ kfree(tcp_death_row);
}
static int __net_init tcp_sk_init(struct net *net)
{
- int res, cpu, cnt;
-
- net->ipv4.tcp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.tcp_sk)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu) {
- struct sock *sk;
-
- res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
- IPPROTO_TCP, net);
- if (res)
- goto fail;
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
- /* Please enforce IP_DF and IPID==0 for RST and
- * ACK sent in SYN-RECV and TIME-WAIT state.
- */
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
- *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
- }
+ int cnt;
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 2;
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
+ net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+ if (!net->ipv4.tcp_death_row)
+ return -ENOMEM;
+ refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
- net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+ net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+ net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
net->ipv4.sysctl_tcp_sack = 1;
@@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.tcp_congestion_control = &tcp_reno;
return 0;
-fail:
- tcp_sk_exit(net);
-
- return res;
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
@@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void)
void __init tcp_v4_init(void)
{
+ int cpu, res;
+
+ for_each_possible_cpu(cpu) {
+ struct sock *sk;
+
+ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+ IPPROTO_TCP, &init_net);
+ if (res)
+ panic("Failed to create the TCP control socket.\n");
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+ /* Please enforce IP_DF and IPID==0 for RST and
+ * ACK sent in SYN-RECV and TIME-WAIT state.
+ */
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+ per_cpu(ipv4_tcp_sk, cpu) = sk;
+ }
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7c2d3ac2363a..6366df7aaf2a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -248,7 +248,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+ struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
tw = inet_twsk_alloc(sk, tcp_death_row, state);
@@ -583,7 +583,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+ tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
@@ -622,8 +622,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
!inet_rtx_syn_ack(sk, req)) {
unsigned long expires = jiffies;
- expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
- TCP_RTO_MAX);
+ expires += reqsk_timeout(req, TCP_RTO_MAX);
if (!fastopen)
mod_timer_pending(&req->rsk_timer, expires);
else
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5079832af5c1..e76bf1e9251e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1960,7 +1960,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+ sk->sk_gso_max_size);
/* Goal is to send at least one packet per ms,
* not one big TSO packet every 100 ms.
@@ -4092,7 +4092,9 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
struct flowi fl;
int res;
- tcp_rsk(req)->txhash = net_tx_rndhash();
+ /* Paired with WRITE_ONCE() in sock_setsockopt() */
+ if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
+ tcp_rsk(req)->txhash = net_tx_rndhash();
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 090360939401..6b4d8361560f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2093,16 +2093,20 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
+ int drop_reason;
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
+ if (rc == -ENOMEM) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
- else
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ } else {
UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
is_udplite);
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ }
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -2120,14 +2124,17 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
+ int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
/*
* Charge it to the socket, dropping if the queue is full.
*/
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
nf_reset_ct(skb);
if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
@@ -2204,8 +2211,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto drop;
+ }
udp_csum_pull_header(skb);
@@ -2213,11 +2222,12 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
return __udp_queue_rcv_skb(sk, skb);
csum_error:
+ drop_reason = SKB_DROP_REASON_UDP_CSUM;
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return -1;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f927c199a93c..4f402bc38f05 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr,
#define IN6_ADDR_HSIZE_SHIFT 8
#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
-/*
- * Configured unicast address hash table
- */
-static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(void);
-static void addrconf_verify_rtnl(void);
-static void addrconf_verify_work(struct work_struct *);
+static void addrconf_verify(struct net *net);
+static void addrconf_verify_rtnl(struct net *net);
static struct workqueue_struct *addrconf_wq;
-static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -554,7 +547,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- devconf->mc_forwarding) < 0)
+ atomic_read(&devconf->mc_forwarding)) < 0)
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -1011,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
- hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev)
return true;
@@ -1024,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
- unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+ struct net *net = dev_net(dev);
+ unsigned int hash = inet6_addr_hash(net, &ifa->addr);
int err = 0;
- spin_lock(&addrconf_hash_lock);
+ spin_lock(&net->ipv6.addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
}
- spin_unlock(&addrconf_hash_lock);
+ spin_unlock(&net->ipv6.addrconf_hash_lock);
return err;
}
@@ -1261,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- int state;
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+ struct net *net = dev_net(ifp->idev->dev);
unsigned long expires;
+ int state;
ASSERT_RTNL();
@@ -1275,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (state == INET6_IFADDR_STATE_DEAD)
goto out;
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
hlist_del_init_rcu(&ifp->addr_lst);
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
write_lock_bh(&ifp->idev->lock);
@@ -1920,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
if (skip_dev_check)
dev = NULL;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
ndev = ifp->idev->dev;
- if (!net_eq(dev_net(ndev), net))
- continue;
if (l3mdev_master_dev_rcu(ndev) != l3mdev)
continue;
@@ -2027,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct inet6_ifaddr *ifp, *result = NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -2096,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
- struct net *net = dev_net(ifp->idev->dev);
+ struct net *net = dev_net(idev->dev);
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2675,7 +2664,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
create, now);
in6_ifa_put(ifp);
- addrconf_verify();
+ addrconf_verify(net);
}
return 0;
@@ -2987,7 +2976,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, cfg->valid_lft,
cfg->preferred_lft, true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
@@ -3027,7 +3016,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
manage_tempaddrs(idev, ifp, 0, 0, false,
jiffies);
ipv6_del_addr(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
false, pfx, dev->ifindex);
@@ -3772,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
- struct hlist_head *h = &inet6_addr_lst[i];
+ struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
@@ -3790,7 +3779,7 @@ restart:
}
}
}
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
}
write_lock_bh(&idev->lock);
@@ -4246,7 +4235,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
* before this temporary address becomes deprecated.
*/
if (ifp->flags & IFA_F_TEMPORARY)
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
}
static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
@@ -4288,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
/* sync with offset */
if (p < state->offset) {
p++;
@@ -4314,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
state->offset++;
return ifa;
}
@@ -4323,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
hlist_for_each_entry_rcu(ifa,
- &inet6_addr_lst[state->bucket], addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
+ &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
return ifa;
}
}
@@ -4413,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr) &&
(ifp->flags & IFA_F_HOMEADDRESS)) {
ret = 1;
@@ -4453,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
hash = inet6_addr_hash(net, addr);
hash_found = false;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
hash_found = true;
@@ -4484,7 +4463,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
* Periodic address status verification
*/
-static void addrconf_verify_rtnl(void)
+static void addrconf_verify_rtnl(struct net *net)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
@@ -4496,11 +4475,11 @@ static void addrconf_verify_rtnl(void)
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- cancel_delayed_work(&addr_chk_work);
+ cancel_delayed_work(&net->ipv6.addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@@ -4599,20 +4578,23 @@ restart:
pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
- mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
static void addrconf_verify_work(struct work_struct *w)
{
+ struct net *net = container_of(to_delayed_work(w), struct net,
+ ipv6.addr_chk_work);
+
rtnl_lock();
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
rtnl_unlock();
}
-static void addrconf_verify(void)
+static void addrconf_verify(struct net *net)
{
- mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
}
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
@@ -4708,7 +4690,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
return 0;
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
+static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
+ struct ifa6_config *cfg)
{
u32 flags;
clock_t expires;
@@ -4822,7 +4805,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
jiffies);
}
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
}
@@ -4909,7 +4892,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
!(nlh->nlmsg_flags & NLM_F_REPLACE))
err = -EEXIST;
else
- err = inet6_addr_modify(ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg);
in6_ifa_put(ifa);
@@ -5533,7 +5516,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
@@ -5794,7 +5777,7 @@ update_lft:
write_unlock_bh(&idev->lock);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
return 0;
}
@@ -7111,6 +7094,14 @@ static int __net_init addrconf_init_net(struct net *net)
int err = -ENOMEM;
struct ipv6_devconf *all, *dflt;
+ spin_lock_init(&net->ipv6.addrconf_hash_lock);
+ INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
+ net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv6.inet6_addr_lst)
+ goto err_alloc_addr;
+
all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -7172,11 +7163,15 @@ err_reg_all:
err_alloc_dflt:
kfree(all);
err_alloc_all:
+ kfree(net->ipv6.inet6_addr_lst);
+err_alloc_addr:
return err;
}
static void __net_exit addrconf_exit_net(struct net *net)
{
+ int i;
+
#ifdef CONFIG_SYSCTL
__addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
NETCONFA_IFINDEX_DEFAULT);
@@ -7184,7 +7179,19 @@ static void __net_exit addrconf_exit_net(struct net *net)
NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
+ net->ipv6.devconf_dflt = NULL;
kfree(net->ipv6.devconf_all);
+ net->ipv6.devconf_all = NULL;
+
+ cancel_delayed_work(&net->ipv6.addr_chk_work);
+ /*
+ * Check hash table, then free it.
+ */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));
+
+ kfree(net->ipv6.inet6_addr_lst);
+ net->ipv6.inet6_addr_lst = NULL;
}
static struct pernet_operations addrconf_ops = {
@@ -7207,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
int __init addrconf_init(void)
{
struct inet6_dev *idev;
- int i, err;
+ int err;
err = ipv6_addr_label_init();
if (err < 0) {
@@ -7254,12 +7261,9 @@ int __init addrconf_init(void)
ip6_route_init_special_entries();
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet6_addr_lst[i]);
-
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify();
+ addrconf_verify(&init_net);
rtnl_af_register(&inet6_ops);
@@ -7317,7 +7321,6 @@ out:
void addrconf_cleanup(void)
{
struct net_device *dev;
- int i;
unregister_netdevice_notifier(&ipv6_dev_notf);
unregister_pernet_subsys(&addrconf_ops);
@@ -7335,14 +7338,6 @@ void addrconf_cleanup(void)
}
addrconf_ifdown(init_net.loopback_dev, true);
- /*
- * Check hash table.
- */
- spin_lock_bh(&addrconf_hash_lock);
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
- spin_unlock_bh(&addrconf_hash_lock);
- cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 77e34aec7e82..658d5eabaf7e 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1344,14 +1344,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return opt2;
}
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
- struct ipv6_txoptions *opt)
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+ struct ipv6_txoptions *opt)
{
/*
* ignore the dest before srcrt unless srcrt is being included.
* --yoshfuji
*/
- if (opt && opt->dst0opt && !opt->srcrt) {
+ if (opt->dst0opt && !opt->srcrt) {
if (opt_space != opt) {
memcpy(opt_space, opt, sizeof(*opt_space));
opt = opt_space;
@@ -1362,7 +1362,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
return opt;
}
-EXPORT_SYMBOL_GPL(ipv6_fixup_options);
+EXPORT_SYMBOL_GPL(__ipv6_fixup_options);
/**
* fl6_update_dst - update flowi destination address with info given
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec029c86ae06..7c2003833010 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -16,6 +16,7 @@
#include <linux/indirect_call_wrapper.h>
#include <net/fib_rules.h>
+#include <net/inet_dscp.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
@@ -25,14 +26,14 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
- u8 tclass;
+ dscp_t dscp;
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->tclass)
+ if (r->dst.plen || r->src.plen || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
- if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+ if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
@@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ goto errout;
+ }
+ rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->src.plen = frh->src_len;
rule6->dst.plen = frh->dst_len;
- rule6->tclass = frh->tos;
if (fib_rule_requires_fldissect(rule))
net->ipv6.fib6_rules_require_fldissect++;
@@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && (rule6->tclass != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
return 0;
if (frh->src_len &&
@@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = rule6->tclass;
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -486,16 +493,21 @@ out_fib6_rules_ops:
goto out;
}
-static void __net_exit fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ cond_resched();
+ }
rtnl_unlock();
}
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
- .exit = fib6_rules_net_exit,
+ .exit_batch = fib6_rules_net_exit_batch,
};
int __init fib6_rules_init(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 96c5cc0f30ce..e6b978ea0e87 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -69,17 +69,7 @@
#include <linux/uaccess.h>
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
};
/* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmpv6_sk(net);
+ sk = this_cpu_read(ipv6_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an
@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
- free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
{
struct sock *sk;
int err, i;
- net->ipv6.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv6.icmp_sk)
- return -ENOMEM;
-
for_each_possible_cpu(i) {
err = inet_ctl_sock_create(&sk, PF_INET6,
- SOCK_RAW, IPPROTO_ICMPV6, net);
+ SOCK_RAW, IPPROTO_ICMPV6, &init_net);
if (err < 0) {
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err);
- goto fail;
+ return err;
}
- *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+ per_cpu(ipv6_icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
}
- return 0;
-
- fail:
- icmpv6_sk_exit(net);
- return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
- .init = icmpv6_sk_init,
- .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
- int err;
-
- err = register_pernet_subsys(&icmpv6_sk_ops);
- if (err < 0)
- return err;
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@@ -1101,14 +1061,12 @@ sender_reg_err:
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
pr_err("Failed to register ICMP6 protocol\n");
- unregister_pernet_subsys(&icmpv6_sk_ops);
return err;
}
void icmpv6_cleanup(void)
{
inet6_unregister_icmp_sender(icmp6_send);
- unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4514444e96c8..4740afecf7c6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f90a87389fcc..f6f5b83dd954 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -32,13 +32,25 @@ struct ioam6_lwt_encap {
struct ioam6_trace_hdr traceh;
} __packed;
+struct ioam6_lwt_freq {
+ u32 k;
+ u32 n;
+};
+
struct ioam6_lwt {
struct dst_cache cache;
+ struct ioam6_lwt_freq freq;
+ atomic_t pkt_cnt;
u8 mode;
struct in6_addr tundst;
struct ioam6_lwt_encap tuninfo;
};
+static struct netlink_range_validation freq_range = {
+ .min = IOAM6_IPTUNNEL_FREQ_MIN,
+ .max = IOAM6_IPTUNNEL_FREQ_MAX,
+};
+
static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
{
return (struct ioam6_lwt *)lwt->data;
@@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
}
static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
+ [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
@@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
struct lwtunnel_state *lwt;
struct ioam6_lwt *ilwt;
int len_aligned, err;
+ u32 freq_k, freq_n;
u8 mode;
if (family != AF_INET6)
@@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
+ if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) ||
+ (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) {
+ NL_SET_ERR_MSG(extack, "freq: missing parameter");
+ return -EINVAL;
+ } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) {
+ freq_k = IOAM6_IPTUNNEL_FREQ_MIN;
+ freq_n = IOAM6_IPTUNNEL_FREQ_MIN;
+ } else {
+ freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]);
+ freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]);
+
+ if (freq_k > freq_n) {
+ NL_SET_ERR_MSG(extack, "freq: k > n is forbidden");
+ return -EINVAL;
+ }
+ }
+
if (!tb[IOAM6_IPTUNNEL_MODE])
mode = IOAM6_IPTUNNEL_MODE_INLINE;
else
@@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
return err;
}
+ atomic_set(&ilwt->pkt_cnt, 0);
+ ilwt->freq.k = freq_k;
+ ilwt->freq.n = freq_n;
+
ilwt->mode = mode;
if (tb[IOAM6_IPTUNNEL_DST])
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
@@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
+ u32 pkt_cnt;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
ilwt = ioam6_lwt_state(dst->lwtstate);
+
+ /* Check for insertion frequency (i.e., "k over n" insertions) */
+ pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
+ if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
+ goto out;
+
orig_daddr = ipv6_hdr(skb)->daddr;
switch (ilwt->mode) {
@@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int err;
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k);
+ if (err)
+ goto ret;
+
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n);
+ if (err)
+ goto ret;
+
err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
if (err)
goto ret;
@@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int nlsize;
- nlsize = nla_total_size(sizeof(ilwt->mode)) +
+ nlsize = nla_total_size(sizeof(ilwt->freq.k)) +
+ nla_total_size(sizeof(ilwt->freq.n)) +
+ nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
@@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
- return (ilwt_a->mode != ilwt_b->mode ||
+ return (ilwt_a->freq.k != ilwt_b->freq.k ||
+ ilwt_a->freq.n != ilwt_b->freq.n ||
+ ilwt_a->mode != ilwt_b->mode ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
trace_a->namespace_id != trace_b->namespace_id);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 80256717868e..d4b1e2c5aa76 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb)
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b29e9ba5e113..d37a79a8554e 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if ((first_word & htonl(0xF00FFFFF)) ||
!ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
!ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
- *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+ iph->nexthdr != iph2->nexthdr) {
not_same_flow:
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -260,7 +260,8 @@ not_same_flow:
goto not_same_flow;
}
/* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+ (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
NAPI_GRO_CB(p)->flush |= flush;
/* If the previous IP ID value was based on an atomic
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2995f8d89e7e..0c6c971ce0a5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1350,11 +1350,16 @@ static void ip6_append_data_mtu(unsigned int *mtu,
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
- struct rt6_info *rt, struct flowi6 *fl6)
+ struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
- struct ipv6_txoptions *opt = ipc6->opt;
+ struct ipv6_txoptions *nopt, *opt = ipc6->opt;
+
+ /* callers pass dst together with a reference, set it first so
+ * ip6_cork_release() can put it down even in case of an error.
+ */
+ cork->base.dst = &rt->dst;
/*
* setup for corking
@@ -1363,39 +1368,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
- if (unlikely(!v6_cork->opt))
+ nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
+ if (unlikely(!nopt))
return -ENOBUFS;
- v6_cork->opt->tot_len = sizeof(*opt);
- v6_cork->opt->opt_flen = opt->opt_flen;
- v6_cork->opt->opt_nflen = opt->opt_nflen;
+ nopt->tot_len = sizeof(*opt);
+ nopt->opt_flen = opt->opt_flen;
+ nopt->opt_nflen = opt->opt_nflen;
- v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
- sk->sk_allocation);
- if (opt->dst0opt && !v6_cork->opt->dst0opt)
+ nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
+ if (opt->dst0opt && !nopt->dst0opt)
return -ENOBUFS;
- v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
- sk->sk_allocation);
- if (opt->dst1opt && !v6_cork->opt->dst1opt)
+ nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
+ if (opt->dst1opt && !nopt->dst1opt)
return -ENOBUFS;
- v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
- sk->sk_allocation);
- if (opt->hopopt && !v6_cork->opt->hopopt)
+ nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
+ if (opt->hopopt && !nopt->hopopt)
return -ENOBUFS;
- v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
- sk->sk_allocation);
- if (opt->srcrt && !v6_cork->opt->srcrt)
+ nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
+ if (opt->srcrt && !nopt->srcrt)
return -ENOBUFS;
/* need source address above miyazawa*/
}
- dst_hold(&rt->dst);
- cork->base.dst = &rt->dst;
- cork->fl.u.ip6 = *fl6;
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
@@ -1426,9 +1424,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
static int __ip6_append_data(struct sock *sk,
- struct flowi6 *fl6,
struct sk_buff_head *queue,
- struct inet_cork *cork,
+ struct inet_cork_full *cork_full,
struct inet6_cork *v6_cork,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
@@ -1437,6 +1434,8 @@ static int __ip6_append_data(struct sock *sk,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
+ struct inet_cork *cork = &cork_full->base;
+ struct flowi6 *fl6 = &cork_full->fl.u.ip6;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
@@ -1788,34 +1787,46 @@ int ip6_append_data(struct sock *sk,
/*
* setup for corking
*/
+ dst_hold(&rt->dst);
err = ip6_setup_cork(sk, &inet->cork, &np->cork,
- ipc6, rt, fl6);
+ ipc6, rt);
if (err)
return err;
+ inet->cork.fl.u.ip6 = *fl6;
exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
- fl6 = &inet->cork.fl.u.ip6;
transhdrlen = 0;
}
- return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
+ return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, ipc6);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
+static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
+{
+ struct dst_entry *dst = cork->base.dst;
+
+ cork->base.dst = NULL;
+ cork->base.flags &= ~IPCORK_ALLFRAG;
+ skb_dst_set(skb, dst);
+}
+
static void ip6_cork_release(struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{
if (v6_cork->opt) {
- kfree(v6_cork->opt->dst0opt);
- kfree(v6_cork->opt->dst1opt);
- kfree(v6_cork->opt->hopopt);
- kfree(v6_cork->opt->srcrt);
- kfree(v6_cork->opt);
+ struct ipv6_txoptions *opt = v6_cork->opt;
+
+ kfree(opt->dst0opt);
+ kfree(opt->dst1opt);
+ kfree(opt->hopopt);
+ kfree(opt->srcrt);
+ kfree(opt);
v6_cork->opt = NULL;
}
@@ -1824,7 +1835,6 @@ static void ip6_cork_release(struct inet_cork_full *cork,
cork->base.dst = NULL;
cork->base.flags &= ~IPCORK_ALLFRAG;
}
- memset(&cork->fl, 0, sizeof(cork->fl));
}
struct sk_buff *__ip6_make_skb(struct sock *sk,
@@ -1834,7 +1844,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
- struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+ struct in6_addr *final_dst;
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
@@ -1864,9 +1874,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
/* Allow local fragmentation. */
skb->ignore_df = ip6_sk_ignore_df(sk);
-
- *final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
+
+ final_dst = &fl6->daddr;
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
@@ -1886,10 +1896,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = sk->sk_priority;
skb->mark = cork->base.mark;
-
skb->tstamp = cork->base.transmit_time;
- skb_dst_set(skb, dst_clone(&rt->dst));
+ ip6_cork_steal_dst(skb, cork);
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -1961,26 +1970,26 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags,
- struct inet_cork_full *cork)
+ struct ipcm6_cookie *ipc6, struct rt6_info *rt,
+ unsigned int flags, struct inet_cork_full *cork)
{
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
- if (flags & MSG_PROBE)
+ if (flags & MSG_PROBE) {
+ dst_release(&rt->dst);
return NULL;
+ }
__skb_queue_head_init(&queue);
cork->base.flags = 0;
cork->base.addr = 0;
cork->base.opt = NULL;
- cork->base.dst = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
+ err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
if (err) {
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
@@ -1988,7 +1997,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
- err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
+ err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 97ade833f58c..53f632a560ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1121,6 +1121,14 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
+ } else if (skb->protocol == htons(ETH_P_IP)) {
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (!rt)
+ goto tx_err_link_failure;
+
+ if (rt->rt_gw_family == AF_INET6)
+ memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
}
} else if (t->parms.proto != 0 && !(t->parms.flags &
(IP6_TNL_F_USE_ORIG_TCLASS |
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8a2db926b5eb..0ebaaec3faf9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -255,13 +255,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
fib_rules_unregister(net->ipv6.mr6_rules_ops);
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -318,10 +317,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ip6mr_free_table(net->ipv6.mrt6);
net->ipv6.mrt6 = NULL;
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -734,7 +732,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding--;
+ atomic_dec(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -902,7 +900,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding++;
+ atomic_inc(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -1325,7 +1323,9 @@ static int __net_init ip6mr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ip6mr_rules_exit(net);
+ rtnl_unlock();
#endif
ip6mr_rules_fail:
ip6mr_notifier_exit(net);
@@ -1338,13 +1338,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_cache", net->proc_net);
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
- ip6mr_rules_exit(net);
ip6mr_notifier_exit(net);
}
+static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip6mr_rules_exit(net);
+ rtnl_unlock();
+}
+
static struct pernet_operations ip6mr_net_ops = {
.init = ip6mr_net_init,
.exit = ip6mr_net_exit,
+ .exit_batch = ip6mr_net_exit_batch,
};
int __init ip6_mr_init(void)
@@ -1553,7 +1563,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
} else {
rcu_assign_pointer(mrt->mroute_sk, sk);
sock_set_flag(sk, SOCK_RCU_FREE);
- net->ipv6.devconf_all->mc_forwarding++;
+ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
write_unlock_bh(&mrt_lock);
@@ -1569,14 +1579,19 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
int ip6mr_sk_done(struct sock *sk)
{
- int err = -EACCES;
struct net *net = sock_net(sk);
+ struct ipv6_devconf *devconf;
struct mr_table *mrt;
+ int err = -EACCES;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return err;
+ devconf = net->ipv6.devconf_all;
+ if (!devconf || !atomic_read(&devconf->mc_forwarding))
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
@@ -1586,7 +1601,7 @@ int ip6mr_sk_done(struct sock *sk)
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
- net->ipv6.devconf_all->mc_forwarding--;
+ atomic_dec(&devconf->mc_forwarding);
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 9256f6ba87ef..d5544cf67ffe 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -59,8 +59,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct pingfakehdr pfh;
struct ipcm6_cookie ipc6;
- pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
-
err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
sizeof(user_icmph));
if (err)
@@ -99,6 +97,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
return -EINVAL;
+ ipcm6_init_sk(&ipc6, np);
+ ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
+
+ err = sock_cmsg_send(sk, msg, &ipc6.sockc);
+ if (err)
+ return err;
+
/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
memset(&fl6, 0, sizeof(fl6));
@@ -107,14 +113,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
fl6.flowi6_oif = oif;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.mark = sk->sk_mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075ee8a2df3b..0c648bf07f39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_timewait_death_row *tcp_death_row;
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
@@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct dst_entry *dst;
int addr_type;
int err;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT);
+ tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
err = inet6_hash_connect(tcp_death_row, sk);
if (err)
goto late_failure;
@@ -2237,15 +2238,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&tcp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
- .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 528b81ef19c9..c6872596b408 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1266,23 +1266,17 @@ static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udp_sock *up = udp_sk(sk);
- struct flowi6 fl6;
int err = 0;
if (up->pending == AF_INET)
return udp_push_pending_frames(sk);
- /* ip6_finish_skb will release the cork, so make a copy of
- * fl6 here.
- */
- fl6 = inet_sk(sk)->cork.fl.u.ip6;
-
skb = ip6_finish_skb(sk);
if (!skb)
goto out;
- err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
-
+ err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6,
+ &inet_sk(sk)->cork.base);
out:
up->len = 0;
up->pending = 0;
@@ -1300,7 +1294,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipv6_txoptions *opt = NULL;
struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
- struct flowi6 fl6;
+ struct inet_cork_full cork;
+ struct flowi6 *fl6 = &cork.fl.u.ip6;
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
@@ -1363,9 +1358,6 @@ do_udp_sendmsg:
}
}
- if (up->pending == AF_INET)
- return udp_sendmsg(sk, msg, len);
-
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
*/
@@ -1374,6 +1366,8 @@ do_udp_sendmsg:
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
if (up->pending) {
+ if (up->pending == AF_INET)
+ return udp_sendmsg(sk, msg, len);
/*
* There are pending frames.
* The socket lock must be held while it's corked.
@@ -1391,19 +1385,19 @@ do_udp_sendmsg:
}
ulen += sizeof(struct udphdr);
- memset(&fl6, 0, sizeof(fl6));
+ memset(fl6, 0, sizeof(*fl6));
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
- fl6.fl6_dport = sin6->sin6_port;
+ fl6->fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
- fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
@@ -1420,24 +1414,24 @@ do_udp_sendmsg:
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
__ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
- fl6.flowi6_oif = sin6->sin6_scope_id;
+ fl6->flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- fl6.fl6_dport = inet->inet_dport;
+ fl6->fl6_dport = inet->inet_dport;
daddr = &sk->sk_v6_daddr;
- fl6.flowlabel = np->flow_label;
+ fl6->flowlabel = np->flow_label;
connected = true;
}
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = sk->sk_bound_dev_if;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk->sk_uid;
if (msg->msg_controllen) {
opt = &opt_space;
@@ -1447,14 +1441,14 @@ do_udp_sendmsg:
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
if (err > 0)
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
&ipc6);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
- if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
@@ -1471,16 +1465,17 @@ do_udp_sendmsg:
opt = ipv6_fixup_options(&opt_space, opt);
ipc6.opt = opt;
- fl6.flowi6_proto = sk->sk_protocol;
- fl6.flowi6_mark = ipc6.sockc.mark;
- fl6.daddr = *daddr;
- if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
- fl6.saddr = np->saddr;
- fl6.fl6_sport = inet->inet_sport;
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->flowi6_mark = ipc6.sockc.mark;
+ fl6->daddr = *daddr;
+ if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
+ fl6->saddr = np->saddr;
+ fl6->fl6_sport = inet->inet_sport;
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
- (struct sockaddr *)sin6, &fl6.saddr);
+ (struct sockaddr *)sin6,
+ &fl6->saddr);
if (err)
goto out_no_dst;
if (sin6) {
@@ -1496,32 +1491,32 @@ do_udp_sendmsg:
err = -EINVAL;
goto out_no_dst;
}
- fl6.fl6_dport = sin6->sin6_port;
- fl6.daddr = sin6->sin6_addr;
+ fl6->fl6_dport = sin6->sin6_port;
+ fl6->daddr = sin6->sin6_addr;
}
}
- if (ipv6_addr_any(&fl6.daddr))
- fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6->daddr))
+ fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(fl6, opt, &final);
if (final_p)
connected = false;
- if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
- fl6.flowi6_oif = np->mcast_oif;
+ if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
+ fl6->flowi6_oif = np->mcast_oif;
connected = false;
- } else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ } else if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
if (ipc6.tclass < 0)
ipc6.tclass = np->tclass;
- fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+ fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected);
+ dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
@@ -1529,7 +1524,7 @@ do_udp_sendmsg:
}
if (ipc6.hlimit < 0)
- ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -1537,17 +1532,17 @@ back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
- struct inet_cork_full cork;
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- &fl6, (struct rt6_info *)dst,
+ (struct rt6_info *)dst,
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_v6_send_skb(skb, &fl6, &cork.base);
- goto out;
+ err = udp_v6_send_skb(skb, fl6, &cork.base);
+ /* ip6_make_skb steals dst reference */
+ goto out_no_dst;
}
lock_sock(sk);
@@ -1568,7 +1563,7 @@ do_append_data:
ipc6.dontfrag = np->dontfrag;
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, (struct rt6_info *)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
@@ -1603,7 +1598,7 @@ out_no_dst:
do_confirm:
if (msg->msg_flags & MSG_PROBE)
- dst_confirm_neigh(dst, &fl6.daddr);
+ dst_confirm_neigh(dst, &fl6->daddr);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index c921de63b494..f0702d920d8d 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/compat.h>
#include <linux/if_arp.h>
#include <linux/net.h>
#include <linux/mctp.h>
@@ -21,6 +22,8 @@
/* socket implementation */
+static void mctp_sk_expire_keys(struct timer_list *timer);
+
static int mctp_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -99,13 +102,20 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
if (addr) {
+ const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
+ MCTP_TAG_PREALLOC;
+
if (addrlen < sizeof(struct sockaddr_mctp))
return -EINVAL;
if (addr->smctp_family != AF_MCTP)
return -EINVAL;
if (!mctp_sockaddr_is_ok(addr))
return -EINVAL;
- if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ if (addr->smctp_tag & ~tagbits)
+ return -EINVAL;
+ /* can't preallocate a non-owned tag */
+ if (addr->smctp_tag & MCTP_TAG_PREALLOC &&
+ !(addr->smctp_tag & MCTP_TAG_OWNER))
return -EINVAL;
} else {
@@ -248,6 +258,32 @@ out_free:
return rc;
}
+/* We're done with the key; invalidate, stop reassembly, and remove from lists.
+ */
+static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
+__must_hold(&net->mctp.keys_lock)
+{
+ struct sk_buff *skb;
+
+ trace_mctp_key_release(key, reason);
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
+
+ hlist_del(&key->hlist);
+ hlist_del(&key->sklist);
+
+ /* unref for the lists */
+ mctp_key_unref(key);
+
+ kfree_skb(skb);
+}
+
static int mctp_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -293,6 +329,115 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
}
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *key = NULL;
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.tag)
+ return -EINVAL;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
+ true, &tag);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
+ if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+ spin_lock_irqsave(&key->lock, flags);
+ __mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED);
+ mctp_key_unref(key);
+ return -EFAULT;
+ }
+
+ mctp_key_unref(key);
+ return 0;
+}
+
+static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags, fl2;
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ int rc;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ /* Must be a local tag, TO set, preallocated */
+ if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
+ return -EINVAL;
+
+ tag = ctl.tag & MCTP_TAG_MASK;
+ rc = -EINVAL;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ /* we do an irqsave here, even though we know the irq state,
+ * so we have the flags to pass to __mctp_key_remove
+ */
+ spin_lock_irqsave(&key->lock, fl2);
+ if (key->manual_alloc &&
+ ctl.peer_addr == key->peer_addr &&
+ tag == key->tag) {
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_DROPPED);
+ rc = 0;
+ } else {
+ spin_unlock_irqrestore(&key->lock, fl2);
+ }
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+
+ switch (cmd) {
+ case SIOCMCTPALLOCTAG:
+ return mctp_ioctl_alloctag(msk, arg);
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl_droptag(msk, arg);
+ }
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp = compat_ptr(arg);
+
+ switch (cmd) {
+ /* These have compatible ptr layouts */
+ case SIOCMCTPALLOCTAG:
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl(sock, cmd, (unsigned long)argp);
+ }
+
+ return -ENOIOCTLCMD;
+}
+#endif
+
static const struct proto_ops mctp_dgram_ops = {
.family = PF_MCTP,
.release = mctp_release,
@@ -302,7 +447,7 @@ static const struct proto_ops mctp_dgram_ops = {
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = datagram_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = mctp_ioctl,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -312,6 +457,9 @@ static const struct proto_ops mctp_dgram_ops = {
.recvmsg = mctp_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = mctp_compat_ioctl,
+#endif
};
static void mctp_sk_expire_keys(struct timer_list *timer)
@@ -319,7 +467,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
struct mctp_sock *msk = container_of(timer, struct mctp_sock,
key_expiry);
struct net *net = sock_net(&msk->sk);
- unsigned long next_expiry, flags;
+ unsigned long next_expiry, flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
bool next_expiry_valid = false;
@@ -327,15 +475,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- spin_lock(&key->lock);
+ /* don't expire. manual_alloc is immutable, no locking
+ * required.
+ */
+ if (key->manual_alloc)
+ continue;
+ spin_lock_irqsave(&key->lock, fl2);
if (!time_after_eq(key->expiry, jiffies)) {
- trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
- key->valid = false;
- hlist_del_rcu(&key->hlist);
- hlist_del_rcu(&key->sklist);
- spin_unlock(&key->lock);
- mctp_key_unref(key);
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_TIMEOUT);
continue;
}
@@ -346,7 +495,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
next_expiry = key->expiry;
next_expiry_valid = true;
}
- spin_unlock(&key->lock);
+ spin_unlock_irqrestore(&key->lock, fl2);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
@@ -387,9 +536,9 @@ static void mctp_sk_unhash(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct net *net = sock_net(sk);
+ unsigned long flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
- unsigned long flags;
/* remove from any type-based binds */
mutex_lock(&net->mctp.bind_lock);
@@ -399,20 +548,8 @@ static void mctp_sk_unhash(struct sock *sk)
/* remove tag allocations */
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- hlist_del(&key->sklist);
- hlist_del(&key->hlist);
-
- trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);
-
- spin_lock(&key->lock);
- kfree_skb(key->reasm_head);
- key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- spin_unlock(&key->lock);
-
- /* key is no longer on the lookup lists, unref */
- mctp_key_unref(key);
+ spin_lock_irqsave(&key->lock, fl2);
+ __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
index ef2755f82f87..02ddc0f1bd3e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/if_arp.h>
#include <linux/if_link.h>
#include <linux/mctp.h>
#include <linux/netdevice.h>
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..17e3482aa770 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -64,8 +64,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
if (msk->bind_type != type)
continue;
- if (msk->bind_addr != MCTP_ADDR_ANY &&
- msk->bind_addr != mh->dest)
+ if (!mctp_address_matches(msk->bind_addr, mh->dest))
continue;
return msk;
@@ -77,7 +76,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
mctp_eid_t peer, u8 tag)
{
- if (key->local_addr != local)
+ if (!mctp_address_matches(key->local_addr, local))
return false;
if (key->peer_addr != peer)
@@ -204,29 +203,38 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
return rc;
}
-/* We're done with the key; unset valid and remove from lists. There may still
- * be outstanding refs on the key though...
+/* Helper for mctp_route_input().
+ * We're done with the key; unlock and unref the key.
+ * For the usual case of automatic expiry we remove the key from lists.
+ * In the case that manual allocation is set on a key we release the lock
+ * and local ref, reset reassembly, but don't remove from lists.
*/
-static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
- unsigned long flags)
- __releases(&key->lock)
+static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
{
struct sk_buff *skb;
+ trace_mctp_key_release(key, reason);
skb = key->reasm_head;
key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- mctp_dev_release_key(key->dev, key);
+
+ if (!key->manual_alloc) {
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ }
spin_unlock_irqrestore(&key->lock, flags);
- spin_lock_irqsave(&net->mctp.keys_lock, flags);
- hlist_del(&key->hlist);
- hlist_del(&key->sklist);
- spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ if (!key->manual_alloc) {
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del(&key->hlist);
+ hlist_del(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
- /* one unref for the lists */
- mctp_key_unref(key);
+ /* unref for the lists */
+ mctp_key_unref(key);
+ }
/* and one for the local reference */
mctp_key_unref(key);
@@ -380,9 +388,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* we've hit a pending reassembly; not much we
* can do but drop it
*/
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
rc = 0;
@@ -424,9 +431,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
} else {
if (key->reasm_head || key->reasm_dead) {
/* duplicate start? drop everything */
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_INVALIDATED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_INVALIDATED);
rc = -EEXIST;
key = NULL;
} else {
@@ -449,10 +455,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* the reassembly/response key
*/
if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
sock_queue_rcv_skb(key->sk, key->reasm_head);
key->reasm_head = NULL;
- trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
@@ -580,9 +586,9 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
* it for the socket msk
*/
-static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t saddr,
- mctp_eid_t daddr, u8 *tagp)
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr, mctp_eid_t saddr,
+ bool manual, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
struct netns_mctp *mns = &net->mctp;
@@ -616,9 +622,8 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if (!((tmp->peer_addr == daddr ||
- tmp->peer_addr == MCTP_ADDR_ANY) &&
- tmp->local_addr == saddr))
+ if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
+ mctp_address_matches(tmp->local_addr, saddr)))
continue;
spin_lock(&tmp->lock);
@@ -638,6 +643,7 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
mctp_reserve_tag(net, key, msk);
trace_mctp_key_acquire(key);
+ key->manual_alloc = manual;
*tagp = key->tag;
}
@@ -651,6 +657,50 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
return key;
}
+static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr,
+ u8 req_tag, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+
+ req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
+ key = NULL;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ if (tmp->tag != req_tag)
+ continue;
+
+ if (!mctp_address_matches(tmp->peer_addr, daddr))
+ continue;
+
+ if (!tmp->manual_alloc)
+ continue;
+
+ spin_lock(&tmp->lock);
+ if (tmp->valid) {
+ key = tmp;
+ refcount_inc(&key->refs);
+ spin_unlock(&tmp->lock);
+ break;
+ }
+ spin_unlock(&tmp->lock);
+ }
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!key)
+ return ERR_PTR(-ENOENT);
+
+ if (tagp)
+ *tagp = key->tag;
+
+ return key;
+}
+
/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
@@ -845,8 +895,14 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rc)
goto out_release;
- if (req_tag & MCTP_HDR_FLAG_TO) {
- key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (req_tag & MCTP_TAG_OWNER) {
+ if (req_tag & MCTP_TAG_PREALLOC)
+ key = mctp_lookup_prealloc_tag(msk, daddr,
+ req_tag, &tag);
+ else
+ key = mctp_alloc_local_tag(msk, daddr, saddr,
+ false, &tag);
+
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_release;
@@ -857,7 +913,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
tag |= MCTP_HDR_FLAG_TO;
} else {
key = NULL;
- tag = req_tag;
+ tag = req_tag & MCTP_TAG_MASK;
}
skb->protocol = htons(ETH_P_MCTP);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 750f9f9b4daf..61205cf40074 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -369,14 +369,15 @@ static void mctp_test_route_input_sk(struct kunit *test)
#define FL_S (MCTP_HDR_FLAG_SOM)
#define FL_E (MCTP_HDR_FLAG_EOM)
-#define FL_T (MCTP_HDR_FLAG_TO)
+#define FL_TO (MCTP_HDR_FLAG_TO)
+#define FL_T(t) ((t) & MCTP_HDR_TAG_MASK)
static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = {
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true },
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 0, .deliver = true },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 1, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_E | FL_TO), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_TO), .type = 0, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false },
};
@@ -436,7 +437,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
__mctp_route_test_fini(test, dev, rt, sock);
}
-#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
+#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = {
{
@@ -522,12 +523,156 @@ static void mctp_route_input_sk_reasm_to_desc(
KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests,
mctp_route_input_sk_reasm_to_desc);
+struct mctp_route_input_sk_keys_test {
+ const char *name;
+ mctp_eid_t key_peer_addr;
+ mctp_eid_t key_local_addr;
+ u8 key_tag;
+ struct mctp_hdr hdr;
+ bool deliver;
+};
+
+/* test packet rx in the presence of various key configurations */
+static void mctp_test_route_input_sk_keys(struct kunit *test)
+{
+ const struct mctp_route_input_sk_keys_test *params;
+ struct mctp_test_route *rt;
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_dev *dev;
+ struct mctp_sk_key *key;
+ struct netns_mctp *mns;
+ struct mctp_sock *msk;
+ struct socket *sock;
+ unsigned long flags;
+ int rc;
+ u8 c;
+
+ params = test->param_value;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ msk = container_of(sock->sk, struct mctp_sock, sk);
+ mns = &sock_net(sock->sk)->mctp;
+
+ /* set the incoming tag according to test params */
+ key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
+ params->key_tag, GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ mctp_reserve_tag(&init_net, key, msk);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ /* create packet and route */
+ c = 0;
+ skb = mctp_test_create_skb_data(&params->hdr, &c);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ rc = mctp_route_input(&rt->rt, skb);
+
+ /* (potentially) receive message */
+ skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+
+ if (params->deliver)
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+ else
+ KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+
+ if (skb2)
+ skb_free_datagram(sock->sk, skb2);
+
+ mctp_key_unref(key);
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = {
+ {
+ .name = "direct match",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "flipped src/dest",
+ .key_peer_addr = 8,
+ .key_local_addr = 9,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "peer addr mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "tag value mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(2)),
+ .deliver = false,
+ },
+ {
+ .name = "TO mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO),
+ .deliver = false,
+ },
+ {
+ .name = "broadcast response",
+ .key_peer_addr = MCTP_ADDR_ANY,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 11, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "any local match",
+ .key_peer_addr = 12,
+ .key_local_addr = MCTP_ADDR_ANY,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 12, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+};
+
+static void mctp_route_input_sk_keys_to_desc(
+ const struct mctp_route_input_sk_keys_test *t,
+ char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
+ mctp_route_input_sk_keys_to_desc);
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm,
mctp_route_input_sk_reasm_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
+ mctp_route_input_sk_keys_gen_params),
{}
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 645dd984fef0..3e82ac24d548 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -336,6 +336,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
+ pr_debug("MP_RST: transient=%u reason=%u",
+ mp_opt->reset_transient, mp_opt->reset_reason);
break;
case MPTCPOPT_MP_FAIL:
@@ -1264,22 +1266,30 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
- if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
- subflow = mptcp_subflow_ctx(ssk);
- subflow->send_mp_fail = 0;
-
- *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
- TCPOLEN_MPTCP_FAIL,
- 0, 0);
- put_unaligned_be64(opts->fail_seq, ptr);
- ptr += 2;
- }
-
- /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
- * see mptcp_established_options*()
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ /* Which options can be used together?
+ *
+ * X: mutually exclusive
+ * O: often used together
+ * C: can be used together in some cases
+ * P: could be used together but we prefer not to (optimisations)
+ *
+ * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC |
+ * ------|------|------|------|------|------|------|------|------|
+ * MPC |------|------|------|------|------|------|------|------|
+ * MPJ | X |------|------|------|------|------|------|------|
+ * DSS | X | X |------|------|------|------|------|------|
+ * ADD | X | X | P |------|------|------|------|------|
+ * RM | C | C | C | P |------|------|------|------|
+ * PRIO | X | C | C | C | C |------|------|------|
+ * FAIL | X | X | C | X | X | X |------|------|
+ * FC | X | X | X | X | X | X | X |------|
+ * RST | X | X | X | X | X | X | O | O |
+ * ------|------|------|------|------|------|------|------|------|
+ *
+ * The same applies in mptcp_established_options() function.
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
@@ -1336,6 +1346,10 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
}
ptr += 1;
}
+
+ /* We might need to add MP_FAIL options in rare cases */
+ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions))
+ goto mp_fail;
} else if (OPTIONS_MPTCP_MPC & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
@@ -1479,6 +1493,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (OPTION_MPTCP_RST & opts->suboptions)
goto mp_rst;
return;
+ } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+mp_fail:
+ /* MP_FAIL is mutually exclusive with others except RST */
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+
+ if (OPTION_MPTCP_RST & opts->suboptions)
+ goto mp_rst;
+ return;
} else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
mp_rst:
*ptr++ = mptcp_option(MPTCPOPT_RST,
@@ -1489,9 +1518,6 @@ mp_rst:
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
subflow = mptcp_subflow_ctx(ssk);
subflow->send_mp_prio = 0;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 356f596e2032..e4fd54fff1d2 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1178,14 +1178,8 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
- if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- NL_SET_ERR_MSG_ATTR(info->extack, attr,
- "flags must have signal when using port");
- return -EINVAL;
- }
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- }
return 0;
}
@@ -1231,6 +1225,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
+ if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+ return -EINVAL;
+ }
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
@@ -1732,9 +1731,20 @@ fail:
return -EMSGSIZE;
}
-static int mptcp_nl_addr_backup(struct net *net,
- struct mptcp_addr_info *addr,
- u8 bkup)
+static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr)
+{
+ struct mptcp_rm_list list = { .nr = 0 };
+
+ list.ids[list.nr++] = addr->id;
+
+ mptcp_pm_nl_rm_subflow_received(msk, &list);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
+}
+
+static int mptcp_nl_set_flags(struct net *net,
+ struct mptcp_addr_info *addr,
+ u8 bkup, u8 changed)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1748,7 +1758,10 @@ static int mptcp_nl_addr_backup(struct net *net,
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
+ mptcp_pm_nl_fullmesh(msk, addr);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
@@ -1765,6 +1778,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+ u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
+ MPTCP_PM_ADDR_FLAG_FULLMESH;
struct net *net = sock_net(skb->sk);
u8 bkup = 0, lookup_by_id = 0;
int ret;
@@ -1787,15 +1802,18 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
return -EINVAL;
}
+ if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ changed = (addr.flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
addr = *entry;
spin_unlock_bh(&pernet->lock);
- mptcp_nl_addr_backup(net, &addr.addr, bkup);
+ mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
return 0;
}
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a135b1a46014..238b6a620e88 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
obj-$(CONFIG_NETFILTER) = netfilter.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 354cb472f386..d1c9dfbb11fa 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -621,7 +621,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
case NF_ACCEPT:
break;
case NF_DROP:
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_NETFILTER_DROP);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 91bc8df3e4b0..385a5f458aba 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,26 +22,7 @@ static bool nf_ct_acct __read_mostly;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
-static const struct nf_ct_ext_type acct_extend = {
- .len = sizeof(struct nf_conn_acct),
- .align = __alignof__(struct nf_conn_acct),
- .id = NF_CT_EXT_ACCT,
-};
-
void nf_conntrack_acct_pernet_init(struct net *net)
{
net->ct.sysctl_acct = nf_ct_acct;
}
-
-int nf_conntrack_acct_init(void)
-{
- int ret = nf_ct_extend_register(&acct_extend);
- if (ret < 0)
- pr_err("Unable to register extension\n");
- return ret;
-}
-
-void nf_conntrack_acct_fini(void)
-{
- nf_ct_extend_unregister(&acct_extend);
-}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
new file mode 100644
index 000000000000..8ad3f52579f3
--- /dev/null
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx (xdp_md, __sk_buff)
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - Passed NULL for bpf_tuple pointer
+ * -EINVAL - opts->reserved is not 0
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - Conntrack lookup could not find entry for tuple
+ * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ * or sizeof(tuple->ipv6)
+ * @l4proto - Layer 4 protocol
+ * Values:
+ * IPPROTO_TCP, IPPROTO_UDP
+ * @reserved - Reserved member, will be reused for more options in future
+ * Values:
+ * 0
+ */
+struct bpf_ct_opts {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+};
+
+enum {
+ NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, u8 protonum,
+ s32 netns_id)
+{
+ struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple tuple;
+
+ if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+ return ERR_PTR(-EPROTO);
+ if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ memset(&tuple, 0, sizeof(tuple));
+ switch (tuple_len) {
+ case sizeof(bpf_tuple->ipv4):
+ tuple.src.l3num = AF_INET;
+ tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+ tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+ tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+ tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+ break;
+ case sizeof(bpf_tuple->ipv6):
+ tuple.src.l3num = AF_INET6;
+ memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+ tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+ memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+ tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ tuple.dst.protonum = protonum;
+
+ if (netns_id >= 0) {
+ net = get_net_ns_by_id(net, netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
+ }
+
+ hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (netns_id >= 0)
+ put_net(net);
+ if (!hash)
+ return ERR_PTR(-ENOENT);
+ return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = dev_net(ctx->rxq->dev);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+ if (!opts)
+ return NULL;
+ if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+ opts->netns_id);
+ if (IS_ERR(nfct)) {
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+ if (!nfct)
+ return;
+ nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &nf_ct_xdp_check_kfunc_ids,
+ .acquire_set = &nf_ct_acquire_kfunc_ids,
+ .release_set = &nf_ct_release_kfunc_ids,
+ .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &nf_ct_tc_check_kfunc_ids,
+ .acquire_set = &nf_ct_acquire_kfunc_ids,
+ .release_set = &nf_ct_release_kfunc_ids,
+ .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d6aa5b47031e..9b7f9c966f73 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -34,10 +34,10 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,7 +47,6 @@
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
-#include <net/netfilter/nf_conntrack_act_ct.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
@@ -594,7 +593,7 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
- nf_ct_ext_destroy(tmpl);
+ kfree(tmpl->ext);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -1597,7 +1596,17 @@ void nf_conntrack_free(struct nf_conn *ct)
*/
WARN_ON(refcount_read(&ct->ct_general.use) != 0);
- nf_ct_ext_destroy(ct);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ const struct nf_nat_hook *nat_hook;
+
+ rcu_read_lock();
+ nat_hook = rcu_dereference(nf_nat_hook);
+ if (nat_hook)
+ nat_hook->remove_nat_bysrc(ct);
+ rcu_read_unlock();
+ }
+
+ kfree(ct->ext);
kmem_cache_free(nf_conntrack_cachep, ct);
cnet = nf_ct_pernet(net);
@@ -2467,13 +2476,7 @@ void nf_conntrack_cleanup_end(void)
kvfree(nf_conntrack_hash);
nf_conntrack_proto_fini();
- nf_conntrack_seqadj_fini();
- nf_conntrack_labels_fini();
nf_conntrack_helper_fini();
- nf_conntrack_timeout_fini();
- nf_conntrack_ecache_fini();
- nf_conntrack_tstamp_fini();
- nf_conntrack_acct_fini();
nf_conntrack_expect_fini();
kmem_cache_destroy(nf_conntrack_cachep);
@@ -2628,39 +2631,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return nf_conntrack_hash_resize(hashsize);
}
-static __always_inline unsigned int total_extension_size(void)
-{
- /* remember to add new extensions below */
- BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
-
- return sizeof(struct nf_ct_ext) +
- sizeof(struct nf_conn_help)
-#if IS_ENABLED(CONFIG_NF_NAT)
- + sizeof(struct nf_conn_nat)
-#endif
- + sizeof(struct nf_conn_seqadj)
- + sizeof(struct nf_conn_acct)
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- + sizeof(struct nf_conntrack_ecache)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
- + sizeof(struct nf_conn_tstamp)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- + sizeof(struct nf_conn_timeout)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- + sizeof(struct nf_conn_labels)
-#endif
-#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
- + sizeof(struct nf_conn_synproxy)
-#endif
-#if IS_ENABLED(CONFIG_NET_ACT_CT)
- + sizeof(struct nf_conn_act_ct_ext)
-#endif
- ;
-};
-
int nf_conntrack_init_start(void)
{
unsigned long nr_pages = totalram_pages();
@@ -2668,9 +2638,6 @@ int nf_conntrack_init_start(void)
int ret = -ENOMEM;
int i;
- /* struct nf_ct_ext uses u8 to store offsets/size */
- BUILD_BUG_ON(total_extension_size() > 255u);
-
seqcount_spinlock_init(&nf_conntrack_generation,
&nf_conntrack_locks_all_lock);
@@ -2715,34 +2682,10 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_expect;
- ret = nf_conntrack_acct_init();
- if (ret < 0)
- goto err_acct;
-
- ret = nf_conntrack_tstamp_init();
- if (ret < 0)
- goto err_tstamp;
-
- ret = nf_conntrack_ecache_init();
- if (ret < 0)
- goto err_ecache;
-
- ret = nf_conntrack_timeout_init();
- if (ret < 0)
- goto err_timeout;
-
ret = nf_conntrack_helper_init();
if (ret < 0)
goto err_helper;
- ret = nf_conntrack_labels_init();
- if (ret < 0)
- goto err_labels;
-
- ret = nf_conntrack_seqadj_init();
- if (ret < 0)
- goto err_seqadj;
-
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
@@ -2750,23 +2693,18 @@ int nf_conntrack_init_start(void)
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
+ ret = register_nf_conntrack_bpf();
+ if (ret < 0)
+ goto err_kfunc;
+
return 0;
+err_kfunc:
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+ nf_conntrack_proto_fini();
err_proto:
- nf_conntrack_seqadj_fini();
-err_seqadj:
- nf_conntrack_labels_fini();
-err_labels:
nf_conntrack_helper_fini();
err_helper:
- nf_conntrack_timeout_fini();
-err_timeout:
- nf_conntrack_ecache_fini();
-err_ecache:
- nf_conntrack_tstamp_fini();
-err_tstamp:
- nf_conntrack_acct_fini();
-err_acct:
nf_conntrack_expect_fini();
err_expect:
kmem_cache_destroy(nf_conntrack_cachep);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 41768ff19464..07e65b4e92f8 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -131,13 +131,13 @@ static void ecache_work(struct work_struct *work)
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
- const unsigned int events,
- const unsigned long missed,
+ const u32 events,
+ const u32 missed,
const struct nf_ct_event *item)
{
- struct nf_conn *ct = item->ct;
struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify;
+ u32 old, want;
int ret;
if (!((events | missed) & e->ctmask))
@@ -157,12 +157,13 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
if (likely(ret >= 0 && missed == 0))
return 0;
- spin_lock_bh(&ct->lock);
- if (ret < 0)
- e->missed |= events;
- else
- e->missed &= ~missed;
- spin_unlock_bh(&ct->lock);
+ do {
+ old = READ_ONCE(e->missed);
+ if (ret < 0)
+ want = old | events;
+ else
+ want = old & ~missed;
+ } while (cmpxchg(&e->missed, old, want) != old);
return ret;
}
@@ -172,7 +173,7 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- unsigned long missed;
+ unsigned int missed;
int ret;
if (!nf_ct_is_confirmed(ct))
@@ -211,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- unsigned long events;
+ unsigned int events;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
return;
@@ -304,12 +305,6 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
-static const struct nf_ct_ext_type event_extend = {
- .len = sizeof(struct nf_conntrack_ecache),
- .align = __alignof__(struct nf_conntrack_ecache),
- .id = NF_CT_EXT_ECACHE,
-};
-
void nf_conntrack_ecache_pernet_init(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
@@ -317,6 +312,8 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
net->ct.sysctl_events = nf_ct_events;
cnet->ct_net = &net->ct;
INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+
+ BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
void nf_conntrack_ecache_pernet_fini(struct net *net)
@@ -325,19 +322,3 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
cancel_delayed_work_sync(&cnet->ecache_dwork);
}
-
-int nf_conntrack_ecache_init(void)
-{
- int ret = nf_ct_extend_register(&event_extend);
- if (ret < 0)
- pr_err("Unable to register event extension\n");
-
- BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
-
- return ret;
-}
-
-void nf_conntrack_ecache_fini(void)
-{
- nf_ct_extend_unregister(&event_extend);
-}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 3dbe2329c3f1..1296fda54ac6 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -13,40 +13,90 @@
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack_extend.h>
-static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
-static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
+#include <net/netfilter/nf_nat.h>
+
#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
-void nf_ct_ext_destroy(struct nf_conn *ct)
+static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
+ [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help),
+#if IS_ENABLED(CONFIG_NF_NAT)
+ [NF_CT_EXT_NAT] = sizeof(struct nf_conn_nat),
+#endif
+ [NF_CT_EXT_SEQADJ] = sizeof(struct nf_conn_seqadj),
+ [NF_CT_EXT_ACCT] = sizeof(struct nf_conn_acct),
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ [NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ [NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ [NF_CT_EXT_SYNPROXY] = sizeof(struct nf_conn_synproxy),
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ [NF_CT_EXT_ACT_CT] = sizeof(struct nf_conn_act_ct_ext),
+#endif
+};
+
+static __always_inline unsigned int total_extension_size(void)
{
- unsigned int i;
- struct nf_ct_ext_type *t;
-
- for (i = 0; i < NF_CT_EXT_NUM; i++) {
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[i]);
-
- /* Here the nf_ct_ext_type might have been unregisterd.
- * I.e., it has responsible to cleanup private
- * area in all conntracks when it is unregisterd.
- */
- if (t && t->destroy)
- t->destroy(ct);
- rcu_read_unlock();
- }
-
- kfree(ct->ext);
+ /* remember to add new extensions below */
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
+
+ return sizeof(struct nf_ct_ext) +
+ sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+ + sizeof(struct nf_conn_nat)
+#endif
+ + sizeof(struct nf_conn_seqadj)
+ + sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ + sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ + sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ + sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ + sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ + sizeof(struct nf_conn_synproxy)
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ + sizeof(struct nf_conn_act_ct_ext)
+#endif
+ ;
}
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
- struct nf_ct_ext_type *t;
struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
+ /* struct nf_ct_ext uses u8 to store offsets/size */
+ BUILD_BUG_ON(total_extension_size() > 255u);
if (ct->ext) {
const struct nf_ct_ext *old = ct->ext;
@@ -58,16 +108,8 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
oldlen = sizeof(*new);
}
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[id]);
- if (!t) {
- rcu_read_unlock();
- return NULL;
- }
-
- newoff = ALIGN(oldlen, t->align);
- newlen = newoff + t->len;
- rcu_read_unlock();
+ newoff = ALIGN(oldlen, __alignof__(struct nf_ct_ext));
+ newlen = newoff + nf_ct_ext_type_len[id];
alloc = max(newlen, NF_CT_EXT_PREALLOC);
new = krealloc(ct->ext, alloc, gfp);
@@ -85,31 +127,3 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
-
-/* This MUST be called in process context. */
-int nf_ct_extend_register(const struct nf_ct_ext_type *type)
-{
- int ret = 0;
-
- mutex_lock(&nf_ct_ext_type_mutex);
- if (nf_ct_ext_types[type->id]) {
- ret = -EBUSY;
- goto out;
- }
-
- rcu_assign_pointer(nf_ct_ext_types[type->id], type);
-out:
- mutex_unlock(&nf_ct_ext_type_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_extend_register);
-
-/* This MUST be called in process context. */
-void nf_ct_extend_unregister(const struct nf_ct_ext_type *type)
-{
- mutex_lock(&nf_ct_ext_type_mutex);
- RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
- mutex_unlock(&nf_ct_ext_type_mutex);
- synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ae4488a13c70..a97ddb1497aa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -550,12 +550,6 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat)
}
EXPORT_SYMBOL_GPL(nf_nat_helper_unregister);
-static const struct nf_ct_ext_type helper_extend = {
- .len = sizeof(struct nf_conn_help),
- .align = __alignof__(struct nf_conn_help),
- .id = NF_CT_EXT_HELPER,
-};
-
void nf_conntrack_helper_pernet_init(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
@@ -565,28 +559,17 @@ void nf_conntrack_helper_pernet_init(struct net *net)
int nf_conntrack_helper_init(void)
{
- int ret;
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
nf_ct_helper_hash =
nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
if (!nf_ct_helper_hash)
return -ENOMEM;
- ret = nf_ct_extend_register(&helper_extend);
- if (ret < 0) {
- pr_err("nf_ct_helper: Unable to register helper extension.\n");
- goto out_extend;
- }
-
INIT_LIST_HEAD(&nf_ct_nat_helpers);
return 0;
-out_extend:
- kvfree(nf_ct_helper_hash);
- return ret;
}
void nf_conntrack_helper_fini(void)
{
- nf_ct_extend_unregister(&helper_extend);
kvfree(nf_ct_helper_hash);
}
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 522792556632..6e70e137a0a6 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -67,6 +67,8 @@ int nf_connlabels_get(struct net *net, unsigned int bits)
net->ct.labels_used++;
spin_unlock(&nf_connlabels_lock);
+ BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_get);
@@ -78,21 +80,3 @@ void nf_connlabels_put(struct net *net)
spin_unlock(&nf_connlabels_lock);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
-
-static const struct nf_ct_ext_type labels_extend = {
- .len = sizeof(struct nf_conn_labels),
- .align = __alignof__(struct nf_conn_labels),
- .id = NF_CT_EXT_LABELS,
-};
-
-int nf_conntrack_labels_init(void)
-{
- BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
-
- return nf_ct_extend_register(&labels_extend);
-}
-
-void nf_conntrack_labels_fini(void)
-{
- nf_ct_extend_unregister(&labels_extend);
-}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7032402ffd33..1ea2ad732d57 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -58,6 +58,12 @@
MODULE_LICENSE("GPL");
+struct ctnetlink_list_dump_ctx {
+ struct nf_conn *last;
+ unsigned int cpu;
+ bool done;
+};
+
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *l4proto)
@@ -1694,14 +1700,18 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
static int ctnetlink_done_list(struct netlink_callback *cb)
{
- if (cb->args[1])
- nf_ct_put((struct nf_conn *)cb->args[1]);
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+
+ if (ctx->last)
+ nf_ct_put(ctx->last);
+
return 0;
}
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
@@ -1712,12 +1722,12 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
struct hlist_nulls_head *list;
struct net *net = sock_net(skb->sk);
- if (cb->args[2])
+ if (ctx->done)
return 0;
- last = (struct nf_conn *)cb->args[1];
+ last = ctx->last;
- for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ for (cpu = ctx->cpu; cpu < nr_cpu_ids; cpu++) {
struct ct_pcpu *pcpu;
if (!cpu_possible(cpu))
@@ -1731,10 +1741,10 @@ restart:
ct = nf_ct_tuplehash_to_ctrack(h);
if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
- if (cb->args[1]) {
+ if (ctx->last) {
if (ct != last)
continue;
- cb->args[1] = 0;
+ ctx->last = NULL;
}
/* We can't dump extension info for the unconfirmed
@@ -1751,19 +1761,19 @@ restart:
if (res < 0) {
if (!refcount_inc_not_zero(&ct->ct_general.use))
continue;
- cb->args[0] = cpu;
- cb->args[1] = (unsigned long)ct;
+ ctx->cpu = cpu;
+ ctx->last = ct;
spin_unlock_bh(&pcpu->lock);
goto out;
}
}
- if (cb->args[1]) {
- cb->args[1] = 0;
+ if (ctx->last) {
+ ctx->last = NULL;
goto restart;
}
spin_unlock_bh(&pcpu->lock);
}
- cb->args[2] = 1;
+ ctx->done = true;
out:
if (last)
nf_ct_put(last);
@@ -3878,6 +3888,8 @@ static int __init ctnetlink_init(void)
{
int ret;
+ BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx));
+
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7d5708b92138..f3fa367b455f 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -45,30 +45,8 @@ MODULE_ALIAS_NFCT_HELPER("pptp");
static DEFINE_SPINLOCK(nf_pptp_lock);
-int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff, struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
-
-int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff, struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
-
-void
-(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig,
- struct nf_conntrack_expect *expect_reply)
- __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre);
-
-void
-(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
- struct nf_conntrack_expect *exp) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
+const struct nf_nat_pptp_hook *nf_nat_pptp_hook;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook);
#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
/* PptpControlMessageType names */
@@ -111,8 +89,8 @@ EXPORT_SYMBOL(pptp_msg_name);
static void pptp_expectfn(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ const struct nf_nat_pptp_hook *hook;
struct net *net = nf_ct_net(ct);
- typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
pr_debug("increasing timeouts\n");
/* increase timeout of GRE data channel conntrack entry */
@@ -122,9 +100,9 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
- if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
- nf_nat_pptp_expectfn(ct, exp);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->master->status & IPS_NAT_MASK)
+ hook->expectfn(ct, exp);
else {
struct nf_conntrack_tuple inv_t;
struct nf_conntrack_expect *exp_other;
@@ -209,9 +187,9 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
{
struct nf_conntrack_expect *exp_orig, *exp_reply;
+ const struct nf_nat_pptp_hook *hook;
enum ip_conntrack_dir dir;
int ret = 1;
- typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre;
exp_orig = nf_ct_expect_alloc(ct);
if (exp_orig == NULL)
@@ -239,9 +217,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
IPPROTO_GRE, &callid, &peer_callid);
exp_reply->expectfn = pptp_expectfn;
- nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
- if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
- nf_nat_pptp_exp_gre(exp_orig, exp_reply);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ hook->exp_gre(exp_orig, exp_reply);
if (nf_ct_expect_related(exp_orig, 0) != 0)
goto out_put_both;
if (nf_ct_expect_related(exp_reply, 0) != 0)
@@ -279,9 +257,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
enum ip_conntrack_info ctinfo)
{
struct nf_ct_pptp_master *info = nfct_help_data(ct);
+ const struct nf_nat_pptp_hook *hook;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
- typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
msg = ntohs(ctlh->messageType);
pr_debug("inbound control message %s\n", pptp_msg_name(msg));
@@ -383,10 +361,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
}
- nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
- if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_inbound(skb, ct, ctinfo,
- protoff, ctlh, pptpReq);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ return hook->inbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -407,9 +384,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
enum ip_conntrack_info ctinfo)
{
struct nf_ct_pptp_master *info = nfct_help_data(ct);
+ const struct nf_nat_pptp_hook *hook;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
- typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
msg = ntohs(ctlh->messageType);
pr_debug("outbound control message %s\n", pptp_msg_name(msg));
@@ -479,10 +456,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
}
- nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
- if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_outbound(skb, ct, ctinfo,
- protoff, ctlh, pptpReq);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ return hook->outbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3b516cffc779..12f793d8fe0c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,8 +63,10 @@ static bool udp_error(struct sk_buff *skb,
}
/* Packet with no checksum */
- if (!hdr->check)
+ if (!hdr->check) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
return false;
+ }
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index 3066449f8bd8..7ab2b25b57bc 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -232,19 +232,3 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
this_way->offset_after : this_way->offset_before;
}
EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
-
-static const struct nf_ct_ext_type nf_ct_seqadj_extend = {
- .len = sizeof(struct nf_conn_seqadj),
- .align = __alignof__(struct nf_conn_seqadj),
- .id = NF_CT_EXT_SEQADJ,
-};
-
-int nf_conntrack_seqadj_init(void)
-{
- return nf_ct_extend_register(&nf_ct_seqadj_extend);
-}
-
-void nf_conntrack_seqadj_fini(void)
-{
- nf_ct_extend_unregister(&nf_ct_seqadj_extend);
-}
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 14387e0b8008..cec166ecba77 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -22,12 +22,8 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-struct nf_ct_timeout *
-(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
-
-void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook);
+const struct nf_ct_timeout_hooks *nf_ct_timeout_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_timeout_hook);
static int untimeout(struct nf_conn *ct, void *timeout)
{
@@ -48,31 +44,30 @@ EXPORT_SYMBOL_GPL(nf_ct_untimeout);
static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout)
{
- typeof(nf_ct_timeout_put_hook) timeout_put;
+ const struct nf_ct_timeout_hooks *h = rcu_dereference(nf_ct_timeout_hook);
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
- if (timeout_put)
- timeout_put(timeout);
+ if (h)
+ h->timeout_put(timeout);
}
int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
u8 l3num, u8 l4num, const char *timeout_name)
{
- typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_ct_timeout_hooks *h;
struct nf_ct_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
const char *errmsg = NULL;
int ret = 0;
rcu_read_lock();
- timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
- if (!timeout_find_get) {
+ h = rcu_dereference(nf_ct_timeout_hook);
+ if (!h) {
ret = -ENOENT;
errmsg = "Timeout policy base is empty";
goto out;
}
- timeout = timeout_find_get(net, timeout_name);
+ timeout = h->timeout_find_get(net, timeout_name);
if (!timeout) {
ret = -ENOENT;
pr_info_ratelimited("No such timeout policy \"%s\"\n",
@@ -119,37 +114,18 @@ EXPORT_SYMBOL_GPL(nf_ct_set_timeout);
void nf_ct_destroy_timeout(struct nf_conn *ct)
{
struct nf_conn_timeout *timeout_ext;
- typeof(nf_ct_timeout_put_hook) timeout_put;
+ const struct nf_ct_timeout_hooks *h;
rcu_read_lock();
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+ h = rcu_dereference(nf_ct_timeout_hook);
- if (timeout_put) {
+ if (h) {
timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext) {
- timeout_put(timeout_ext->timeout);
+ h->timeout_put(timeout_ext->timeout);
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout);
-
-static const struct nf_ct_ext_type timeout_extend = {
- .len = sizeof(struct nf_conn_timeout),
- .align = __alignof__(struct nf_conn_timeout),
- .id = NF_CT_EXT_TIMEOUT,
-};
-
-int nf_conntrack_timeout_init(void)
-{
- int ret = nf_ct_extend_register(&timeout_extend);
- if (ret < 0)
- pr_err("nf_ct_timeout: Unable to register timeout extension.\n");
- return ret;
-}
-
-void nf_conntrack_timeout_fini(void)
-{
- nf_ct_extend_unregister(&timeout_extend);
-}
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index f656d393fa92..9e43a0a59e73 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -19,27 +19,7 @@ static bool nf_ct_tstamp __read_mostly;
module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
-static const struct nf_ct_ext_type tstamp_extend = {
- .len = sizeof(struct nf_conn_tstamp),
- .align = __alignof__(struct nf_conn_tstamp),
- .id = NF_CT_EXT_TSTAMP,
-};
-
void nf_conntrack_tstamp_pernet_init(struct net *net)
{
net->ct.sysctl_tstamp = nf_ct_tstamp;
}
-
-int nf_conntrack_tstamp_init(void)
-{
- int ret;
- ret = nf_ct_extend_register(&tstamp_extend);
- if (ret < 0)
- pr_err("Unable to register extension\n");
- return ret;
-}
-
-void nf_conntrack_tstamp_fini(void)
-{
- nf_ct_extend_unregister(&tstamp_extend);
-}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2d06a66899b2..58c06ac10179 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -838,7 +838,7 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
-static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
@@ -860,7 +860,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* will delete entry from already-freed table.
*/
if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
- __nf_nat_cleanup_conntrack(ct);
+ nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -868,20 +868,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 0;
}
-/* No one using conntrack by the time this called. */
-static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
-{
- if (ct->status & IPS_SRC_NAT_DONE)
- __nf_nat_cleanup_conntrack(ct);
-}
-
-static struct nf_ct_ext_type nat_extend __read_mostly = {
- .len = sizeof(struct nf_conn_nat),
- .align = __alignof__(struct nf_conn_nat),
- .destroy = nf_nat_cleanup_conntrack,
- .id = NF_CT_EXT_NAT,
-};
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1173,6 +1159,7 @@ static const struct nf_nat_hook nat_hook = {
.decode_session = __nf_nat_decode_session,
#endif
.manip_pkt = nf_nat_manip_pkt,
+ .remove_nat_bysrc = nf_nat_cleanup_conntrack,
};
static int __init nf_nat_init(void)
@@ -1188,19 +1175,11 @@ static int __init nf_nat_init(void)
if (!nf_nat_bysource)
return -ENOMEM;
- ret = nf_ct_extend_register(&nat_extend);
- if (ret < 0) {
- kvfree(nf_nat_bysource);
- pr_err("Unable to register extension\n");
- return ret;
- }
-
for (i = 0; i < CONNTRACK_LOCKS; i++)
spin_lock_init(&nf_nat_locks[i]);
ret = register_pernet_subsys(&nat_net_ops);
if (ret < 0) {
- nf_ct_extend_unregister(&nat_extend);
kvfree(nf_nat_bysource);
return ret;
}
@@ -1219,7 +1198,6 @@ static void __exit nf_nat_cleanup(void)
nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
- nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nf_nat_hook, NULL);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 2dfc5dae0656..e479dd0561c5 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -236,12 +236,6 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff,
return 1;
}
-static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
- .len = sizeof(struct nf_conn_synproxy),
- .align = __alignof__(struct nf_conn_synproxy),
- .id = NF_CT_EXT_SYNPROXY,
-};
-
#ifdef CONFIG_PROC_FS
static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -387,28 +381,12 @@ static struct pernet_operations synproxy_net_ops = {
static int __init synproxy_core_init(void)
{
- int err;
-
- err = nf_ct_extend_register(&nf_ct_synproxy_extend);
- if (err < 0)
- goto err1;
-
- err = register_pernet_subsys(&synproxy_net_ops);
- if (err < 0)
- goto err2;
-
- return 0;
-
-err2:
- nf_ct_extend_unregister(&nf_ct_synproxy_extend);
-err1:
- return err;
+ return register_pernet_subsys(&synproxy_net_ops);
}
static void __exit synproxy_core_exit(void)
{
unregister_pernet_subsys(&synproxy_net_ops);
- nf_ct_extend_unregister(&nf_ct_synproxy_extend);
}
module_init(synproxy_core_init);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 36e73f9828c5..c6c05b2412c4 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -67,6 +67,20 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+static void nft_cmp16_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ const u64 *reg_data = (const u64 *)&regs->data[priv->sreg];
+ const u64 *mask = (const u64 *)&priv->mask;
+ const u64 *data = (const u64 *)&priv->data;
+
+ if (((reg_data[0] & mask[0]) == data[0] &&
+ ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
+ return;
+ regs->verdict.code = NFT_BREAK;
+}
+
static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_regs *regs)
@@ -225,6 +239,8 @@ next_rule:
nft_rule_dp_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
+ else if (expr->ops == &nft_cmp16_fast_ops)
+ nft_cmp16_fast_eval(expr, &regs);
else if (expr->ops == &nft_bitwise_fast_ops)
nft_bitwise_fast_eval(expr, &regs);
else if (expr->ops != &nft_payload_fast_ops ||
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index c57673d499be..b0d8888a539b 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -605,6 +605,11 @@ static struct pernet_operations cttimeout_ops = {
.size = sizeof(struct nfct_timeout_pernet),
};
+static const struct nf_ct_timeout_hooks hooks = {
+ .timeout_find_get = ctnl_timeout_find_get,
+ .timeout_put = ctnl_timeout_put,
+};
+
static int __init cttimeout_init(void)
{
int ret;
@@ -619,8 +624,7 @@ static int __init cttimeout_init(void)
"nfnetlink.\n");
goto err_out;
}
- RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get);
- RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);
+ RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks);
return 0;
err_out:
@@ -633,8 +637,7 @@ static void __exit cttimeout_exit(void)
nfnetlink_subsys_unregister(&cttimeout_subsys);
unregister_pernet_subsys(&cttimeout_ops);
- RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
- RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+ RCU_INIT_POINTER(nf_ct_timeout_hook, NULL);
synchronize_rcu();
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea2d9c2a44cf..8c15978d9258 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -402,6 +402,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ + nla_total_size(sizeof(u_int32_t)) /* priority */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
@@ -559,6 +560,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
goto nla_put_failure;
+ if (entskb->priority &&
+ nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
+ goto nla_put_failure;
+
if (indev && entskb->dev &&
skb_mac_header_was_set(entskb) &&
skb_mac_header_len(entskb) != 0) {
@@ -1014,11 +1019,13 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_CT] = { .type = NLA_UNSPEC },
[NFQA_EXP] = { .type = NLA_UNSPEC },
[NFQA_VLAN] = { .type = NLA_NESTED },
+ [NFQA_PRIORITY] = { .type = NLA_U32 },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
[NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
[NFQA_MARK] = { .type = NLA_U32 },
+ [NFQA_PRIORITY] = { .type = NLA_U32 },
};
static struct nfqnl_instance *
@@ -1099,6 +1106,9 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+ if (nfqa[NFQA_PRIORITY])
+ entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
nfqnl_reinject(entry, verdict);
}
return 0;
@@ -1225,6 +1235,9 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+ if (nfqa[NFQA_PRIORITY])
+ entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
nfqnl_reinject(entry, verdict);
return 0;
}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 47b6d05f1ae6..917072af09df 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -272,12 +272,103 @@ const struct nft_expr_ops nft_cmp_fast_ops = {
.offload = nft_cmp_fast_offload,
};
+static u32 nft_cmp_mask(u32 bitlen)
+{
+ return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
+}
+
+static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
+{
+ int len = bitlen / BITS_PER_BYTE;
+ int i, words = len / sizeof(u32);
+
+ for (i = 0; i < words; i++) {
+ data->data[i] = 0xffffffff;
+ bitlen -= sizeof(u32) * BITS_PER_BYTE;
+ }
+
+ if (len % sizeof(u32))
+ data->data[i++] = nft_cmp_mask(bitlen);
+
+ for (; i < 4; i++)
+ data->data[i] = 0;
+}
+
+static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ int err;
+
+ err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
+ priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
+ priv->len = desc.len;
+
+ return 0;
+}
+
+static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_cmp_expr cmp = {
+ .data = priv->data,
+ .sreg = priv->sreg,
+ .len = priv->len,
+ .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
+ };
+
+ return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
+static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
+
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+
+const struct nft_expr_ops nft_cmp16_fast_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
+ .eval = NULL, /* inlined */
+ .init = nft_cmp16_fast_init,
+ .dump = nft_cmp16_fast_dump,
+ .offload = nft_cmp16_fast_offload,
+};
+
static const struct nft_expr_ops *
nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
struct nft_data_desc desc;
struct nft_data data;
enum nft_cmp_ops op;
+ u8 sreg;
int err;
if (tb[NFTA_CMP_SREG] == NULL ||
@@ -306,9 +397,16 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
if (desc.type != NFT_DATA_VALUE)
goto err1;
- if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
- return &nft_cmp_fast_ops;
+ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
+ if (desc.len <= sizeof(u32))
+ return &nft_cmp_fast_ops;
+ else if (desc.len <= sizeof(data) &&
+ ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
+ (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
+ return &nft_cmp16_fast_ops;
+ }
return &nft_cmp_ops;
err1:
nft_data_release(&data, desc.type);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f69cc73c5813..5a46d8289d1d 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -731,6 +731,14 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
static struct nft_expr_type nft_match_type;
+static bool nft_match_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct xt_match *match = expr->ops->data;
+
+ return strcmp(match->name, "comment") == 0;
+}
+
static const struct nft_expr_ops *
nft_match_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -773,6 +781,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
ops->dump = nft_match_dump;
ops->validate = nft_match_validate;
ops->data = match;
+ ops->reduce = nft_match_reduce;
matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
if (matchsize > NFT_MATCH_LARGE_THRESH) {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 9e927ab4df15..d2b9378164bb 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -308,6 +308,63 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, tcphdr_len, optl;
+ struct tcphdr *tcph;
+ u8 *opt;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ goto err;
+
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto drop;
+
+ opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!opt)
+ goto err;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ unsigned int j;
+
+ optl = optlen(opt, i);
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len)
+ goto drop;
+
+ for (j = 0; j < optl; ++j) {
+ u16 n = TCPOPT_NOP;
+ u16 o = opt[i+j];
+
+ if ((i + j) % 2 == 0) {
+ o <<= 8;
+ n <<= 8;
+ }
+ inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
+ htons(n), false);
+ }
+ memset(opt + i, TCPOPT_NOP, optl);
+ return;
+ }
+
+ /* option not found, continue. This allows to do multiple
+ * option removals per rule.
+ */
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+ return;
+drop:
+ /* can't remove, no choice but to drop */
+ regs->verdict.code = NF_DROP;
+}
+
static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -457,6 +514,28 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->len);
}
+static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_EXTHDR_SREG] ||
+ tb[NFTA_EXTHDR_DREG] ||
+ tb[NFTA_EXTHDR_FLAGS] ||
+ tb[NFTA_EXTHDR_OFFSET] ||
+ tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (!tb[NFTA_EXTHDR_TYPE])
+ return -EINVAL;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->op = NFT_EXTHDR_OP_TCPOPT;
+
+ return 0;
+}
+
static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -517,6 +596,13 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
return nft_exthdr_dump_common(skb, priv);
}
+static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -549,6 +635,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
.dump = nft_exthdr_dump_set,
};
+static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_strip_eval,
+ .init = nft_exthdr_tcp_strip_init,
+ .dump = nft_exthdr_dump_strip,
+};
+
static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -576,7 +670,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
return &nft_exthdr_tcp_set_ops;
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_tcp_ops;
- break;
+ return &nft_exthdr_tcp_strip_ops;
case NFT_EXTHDR_OP_IPV6:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_ipv6_ops;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..7e8a39a35627 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/pkt_cls.h>
#include "datapath.h"
#include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
dp->user_features = 0;
}
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
static int ovs_dp_set_upcall_portids(struct datapath *dp,
const struct nlattr *ids)
{
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- u32 user_features = 0;
+ u32 user_features = 0, old_features = dp->user_features;
int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return err;
}
- if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
- static_branch_enable(&tc_recirc_sharing_support);
- else
- static_branch_disable(&tc_recirc_sharing_support);
+ if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_enable();
+ else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ (old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_disable();
return 0;
}
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = &dp->table;
int i;
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ tc_skb_ext_tc_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..f6cd24fd530c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
#include <net/mpls.h>
#include <net/ndisc.h>
#include <net/nsh.h>
+#include <net/pkt_cls.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->mac_proto = res;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..7108e71ce4db 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -57,12 +57,6 @@ static const struct rhashtable_params zones_params = {
.automatic_shrinking = true,
};
-static struct nf_ct_ext_type act_ct_extend __read_mostly = {
- .len = sizeof(struct nf_conn_act_ct_ext),
- .align = __alignof__(struct nf_conn_act_ct_ext),
- .id = NF_CT_EXT_ACT_CT,
-};
-
static struct flow_action_entry *
tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
{
@@ -1608,16 +1602,10 @@ static int __init ct_init_module(void)
if (err)
goto err_register;
- err = nf_ct_extend_register(&act_ct_extend);
- if (err)
- goto err_register_extend;
-
static_branch_inc(&tcf_frag_xmit_count);
return 0;
-err_register_extend:
- tcf_unregister_action(&act_ct_ops, &ct_net_ops);
err_register:
tcf_ct_flow_tables_uninit();
err_tbl_init:
@@ -1628,7 +1616,6 @@ err_tbl_init:
static void __exit ct_cleanup_module(void)
{
static_branch_dec(&tcf_frag_xmit_count);
- nf_ct_extend_unregister(&act_ct_extend);
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
tcf_ct_flow_tables_uninit();
destroy_workqueue(act_ct_wq);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5f0f346b576f..ff1e6b474fef 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+EXPORT_SYMBOL(tc_skb_ext_tc);
+
+void tc_skb_ext_tc_enable(void)
+{
+ static_branch_inc(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_enable);
+
+void tc_skb_ext_tc_disable(void)
+{
+ static_branch_dec(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_disable);
+#endif
+
static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
{
return jhash_3words(tp->chain->index, tp->prio,
@@ -1615,19 +1632,21 @@ int tcf_classify(struct sk_buff *skb,
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
&last_executed_chain);
- /* If we missed on some chain */
- if (ret == TC_ACT_UNSPEC && last_executed_chain) {
- struct tc_skb_cb *cb = tc_skb_cb(skb);
-
- ext = tc_skb_ext_alloc(skb);
- if (WARN_ON_ONCE(!ext))
- return TC_ACT_SHOT;
- ext->chain = last_executed_chain;
- ext->mru = cb->mru;
- ext->post_ct = cb->post_ct;
- ext->post_ct_snat = cb->post_ct_snat;
- ext->post_ct_dnat = cb->post_ct_dnat;
- ext->zone = cb->zone;
+ if (tc_skb_ext_tc_enabled()) {
+ /* If we missed on some chain */
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+ struct tc_skb_cb *cb = tc_skb_cb(skb);
+
+ ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+ ext->chain = last_executed_chain;
+ ext->mru = cb->mru;
+ ext->post_ct = cb->post_ct;
+ ext->post_ct_snat = cb->post_ct_snat;
+ ext->post_ct_dnat = cb->post_ct_dnat;
+ ext->zone = cb->zone;
+ }
}
return ret;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8c89d0b0ca18..00b2e9deabb0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2626,8 +2626,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (!val) {
SMC_STAT_INC(smc, cork_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
@@ -2765,8 +2765,10 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
rc = kernel_sendpage(smc->clcsock, page, offset,
size, flags);
} else {
+ lock_sock(sk);
+ rc = smc_tx_sendpage(smc, page, offset, size, flags);
+ release_sock(sk);
SMC_STAT_INC(smc, sendpage_cnt);
- rc = sock_no_sendpage(sock, page, offset, size, flags);
}
out:
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index be241d53020f..a96ce162825e 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -31,7 +31,6 @@
#include "smc_tracepoint.h"
#define SMC_TX_WORK_DELAY 0
-#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
/***************************** sndbuf producer *******************************/
@@ -236,16 +235,15 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
*/
if ((msg->msg_flags & MSG_OOB) && !send_remaining)
conn->urg_tx_pend = true;
- if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
- (atomic_read(&conn->sndbuf_space) >
- (conn->sndbuf_desc->len >> 1)))
- /* for a corked socket defer the RDMA writes if there
- * is still sufficient sndbuf_space available
+ if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) ||
+ msg->msg_flags & MSG_SENDPAGE_NOTLAST) &&
+ (atomic_read(&conn->sndbuf_space)))
+ /* for a corked socket defer the RDMA writes if
+ * sndbuf_space is still available. The applications
+ * should known how/when to uncork it.
*/
- queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
- SMC_TX_CORK_DELAY);
- else
- smc_tx_sndbuf_nonempty(conn);
+ continue;
+ smc_tx_sndbuf_nonempty(conn);
trace_smc_tx_sendmsg(smc, copylen);
} /* while (msg_data_left(msg)) */
@@ -260,6 +258,22 @@ out_err:
return rc;
}
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+ size_t size, int flags)
+{
+ struct msghdr msg = {.msg_flags = flags};
+ char *kaddr = kmap(page);
+ struct kvec iov;
+ int rc;
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
+ rc = smc_tx_sendmsg(smc, &msg, size);
+ kunmap(page);
+ return rc;
+}
+
/***************************** sndbuf consumer *******************************/
/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
@@ -597,27 +611,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-/* Wakeup sndbuf consumers from process context
- * since there is more data to transmit
- */
-void smc_tx_work(struct work_struct *work)
+void smc_tx_pending(struct smc_connection *conn)
{
- struct smc_connection *conn = container_of(to_delayed_work(work),
- struct smc_connection,
- tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc;
- lock_sock(&smc->sk);
if (smc->sk.sk_err)
- goto out;
+ return;
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+}
+
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit
+ */
+void smc_tx_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(to_delayed_work(work),
+ struct smc_connection,
+ tx_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
-out:
+ lock_sock(&smc->sk);
+ smc_tx_pending(conn);
release_sock(&smc->sk);
}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 07e6ad76224a..34b578498b1f 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -27,9 +27,12 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
}
+void smc_tx_pending(struct smc_connection *conn);
void smc_tx_work(struct work_struct *work);
void smc_tx_init(struct smc_sock *smc);
int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+ size_t size, int flags);
int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
void smc_tx_consumer_update(struct smc_connection *conn, bool force);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5f42aa5fc612..8eb7e8544815 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -72,7 +72,8 @@ struct gss_auth {
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
- struct net *net;
+ struct net *net;
+ netns_tracker ns_tracker;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
@@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
goto err_free;
}
gss_auth->client = clnt;
- gss_auth->net = get_net(rpc_net_ns(clnt));
+ gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker,
+ GFP_KERNEL);
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech)
@@ -1068,7 +1070,7 @@ err_destroy_credcache:
err_put_mech:
gss_mech_put(gss_auth->mech);
err_put_net:
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
err_free:
kfree(gss_auth->target_name);
kfree(gss_auth);
@@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth)
gss_pipe_free(gss_auth->gss_pipe[0]);
gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
kfree(gss_auth->target_name);
kfree(gss_auth);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b21ad7994147..db878e833b67 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -162,7 +162,7 @@ static void svc_xprt_free(struct kref *kref)
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
svcauth_unix_info_release(xprt);
put_cred(xprt->xpt_cred);
- put_net(xprt->xpt_net);
+ put_net_track(xprt->xpt_net, &xprt->ns_tracker);
/* See comment on corresponding get in xs_setup_bc_tcp(): */
if (xprt->xpt_bc_xprt)
xprt_put(xprt->xpt_bc_xprt);
@@ -198,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_net = get_net(net);
+ xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC);
strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a02de2bddb28..5af484d6ba5e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1835,7 +1835,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
void xprt_free(struct rpc_xprt *xprt)
{
- put_net(xprt->xprt_net);
+ put_net_track(xprt->xprt_net, &xprt->ns_tracker);
xprt_free_all_slots(xprt);
xprt_free_id(xprt);
rpc_sysfs_xprt_destroy(xprt);
@@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
xprt_init_xid(xprt);
- xprt->xprt_net = get_net(net);
+ xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL);
}
/**
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index b62565278fac..12e6b4146bfb 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -85,7 +85,7 @@ static int switchdev_deferred_enqueue(struct net_device *dev,
{
struct switchdev_deferred_item *dfitem;
- dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
+ dfitem = kmalloc(struct_size(dfitem, data, data_len), GFP_ATOMIC);
if (!dfitem)
return -ENOMEM;
dfitem->dev = dev;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 64ae4c4c44f8..c5eec16213d7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
m->hdr[w] |= htonl(val);
}
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
- u32 temp = msg->hdr[a];
-
- msg->hdr[a] = msg->hdr[b];
- msg->hdr[b] = temp;
-}
-
/*
* Word 0
*/
@@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
}
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
- msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
static inline u32 msg_lookup_scope(struct tipc_msg *m)
{
return msg_bits(m, 1, 19, 0x3);
@@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
msg_set_word(m, 2, n);
}
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
- return msg_bits(m, 2, 16, 0xffff);
-}
-
static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
@@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
msg_set_bits(m, 4, 0, 0xffff, n);
}
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
- msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
static inline u32 msg_bc_netid(struct tipc_msg *m)
{
return msg_word(m, 4);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index efc84845bb6b..0024a692f0f8 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1433,7 +1433,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
if (*zc && (out_iov || out_sg)) {
if (out_iov)
- n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+ n_sgout = 1 +
+ iov_iter_npages_cap(out_iov, INT_MAX, data_len);
else
n_sgout = sg_nents(out_sg);
n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c19569819866..3e0d6281fd1e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
return sk;
}
-static struct sock *unix_next_socket(struct seq_file *seq,
- struct sock *sk,
- loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
{
unsigned long bucket = get_bucket(*pos);
+ struct sock *sk;
- while (sk > (struct sock *)SEQ_START_TOKEN) {
- sk = sk_next(sk);
- if (!sk)
- goto next_bucket;
- if (sock_net(sk) == seq_file_net(seq))
- return sk;
- }
-
- do {
+ while (bucket < ARRAY_SIZE(unix_socket_table)) {
spin_lock(&unix_table_locks[bucket]);
+
sk = unix_from_bucket(seq, pos);
if (sk)
return sk;
-next_bucket:
- spin_unlock(&unix_table_locks[bucket++]);
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < ARRAY_SIZE(unix_socket_table));
+ spin_unlock(&unix_table_locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+ }
return NULL;
}
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+ loff_t *pos)
+{
+ unsigned long bucket = get_bucket(*pos);
+
+ for (sk = sk_next(sk); sk; sk = sk_next(sk))
+ if (sock_net(sk) == seq_file_net(seq))
+ return sk;
+
+ spin_unlock(&unix_table_locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+
+ return unix_get_first(seq, pos);
+}
+
static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
{
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
- return NULL;
-
- return unix_next_socket(seq, NULL, pos);
+ return unix_get_first(seq, pos);
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return unix_next_socket(seq, v, pos);
+
+ if (v == SEQ_START_TOKEN)
+ return unix_get_first(seq, pos);
+
+ return unix_get_next(seq, v, pos);
}
static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = {
};
#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+ struct seq_net_private p;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__unix {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
return bpf_iter_run_prog(prog, &ctx);
}
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+ if (sock_net(sk) != seq_file_net(seq))
+ continue;
+
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+
+ expected++;
+ }
+
+ spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+ return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_unix_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+ loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ if (iter->st_bucket_done)
+ *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+
+ sk = unix_get_first(seq, pos);
+ if (!sk)
+ return NULL; /* Done */
+
+ expected = bpf_iter_unix_hold_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (!*pos)
+ return SEQ_START_TOKEN;
+
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+
+ ++*pos;
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_unix_batch(seq, pos);
+
+ return sk;
+}
+
static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
+ bool slow;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return unix_prog_seq_show(prog, &meta, v, uid);
+ ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+ unlock_sock_fast(sk, slow);
+ return ret;
}
static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_unix_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
@@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
(void)unix_prog_seq_show(prog, &meta, v, 0);
}
- unix_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk)
+ bpf_iter_unix_put_batch(iter);
}
static const struct seq_operations bpf_iter_unix_seq_ops = {
- .start = unix_seq_start,
- .next = unix_seq_next,
+ .start = bpf_iter_unix_seq_start,
+ .next = bpf_iter_unix_seq_next,
.stop = bpf_iter_unix_seq_stop,
.show = bpf_iter_unix_seq_show,
};
@@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = {
DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
struct unix_sock *unix_sk, uid_t uid)
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+ int err;
+
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
+
+ err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+
+ bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
+}
+
static const struct bpf_iter_seq_info unix_seq_info = {
.seq_ops = &bpf_iter_unix_seq_ops,
- .init_seq_private = bpf_iter_init_seq_net,
- .fini_seq_private = bpf_iter_fini_seq_net,
- .seq_priv_size = sizeof(struct seq_net_private),
+ .init_seq_private = bpf_iter_init_unix,
+ .fini_seq_private = bpf_iter_fini_unix,
+ .seq_priv_size = sizeof(struct bpf_unix_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg unix_reg_info = {
.target = "unix",
.ctx_arg_info_size = 1,
@@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = {
{ offsetof(struct bpf_iter__unix, unix_sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_unix_get_func_proto,
.seq_info = &unix_seq_info,
};
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 28ef3f4465ae..2abd64e4d589 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -343,9 +343,9 @@ out:
}
EXPORT_SYMBOL(xsk_tx_peek_desc);
-static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
{
+ struct xdp_desc *descs = pool->tx_descs;
u32 nb_pkts = 0;
while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
@@ -355,8 +355,7 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d
return nb_pkts;
}
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
{
struct xdp_sock *xs;
u32 nb_pkts;
@@ -365,7 +364,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
if (!list_is_singular(&pool->xsk_tx_list)) {
/* Fallback to the non-batched version */
rcu_read_unlock();
- return xsk_tx_peek_release_fallback(pool, descs, max_entries);
+ return xsk_tx_peek_release_fallback(pool, max_entries);
}
xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -374,7 +373,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
+ nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
if (!nb_pkts) {
xs->tx->queue_empty_descs++;
goto out;
@@ -386,7 +385,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
* packets. This avoids having to implement any buffering in
* the Tx path.
*/
- nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
+ nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
if (!nb_pkts)
goto out;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index fd39bb660ebc..b34fca6ada86 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -37,6 +37,7 @@ void xp_destroy(struct xsk_buff_pool *pool)
if (!pool)
return;
+ kvfree(pool->tx_descs);
kvfree(pool->heads);
kvfree(pool);
}
@@ -58,6 +59,12 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
+ if (xs->tx) {
+ pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
+ if (!pool->tx_descs)
+ goto out;
+ }
+
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
pool->heads_cnt = umem->chunks;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index e9aa2c236356..801cda5d1938 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -205,11 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
return false;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
- struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
+ struct xdp_desc *descs = pool->tx_descs;
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
@@ -282,12 +282,12 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
+static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 entries = xskq_cons_nb_entries(q, max);
- return xskq_cons_read_desc_batch(q, descs, pool, entries);
+ return xskq_cons_read_desc_batch(q, pool, entries);
}
/* To improve performance in the xskq_cons_release functions, only update local state here.
@@ -304,13 +304,6 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline bool xskq_cons_is_full(struct xsk_queue *q)
-{
- /* No barriers needed since data is not accessed */
- return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
- q->nentries;
-}
-
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 319fd31522f3..e69651a6902f 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -413,7 +413,7 @@ static void fixup_map(struct bpf_object *obj)
for (i = 0; i < NR_TESTS; i++) {
if (!strcmp(test_map_names[i], name) &&
(check_test_flags(i))) {
- bpf_map__resize(map, num_map_entries);
+ bpf_map__set_max_entries(map, num_map_entries);
continue;
}
}
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 8675fa5273df..631f0cabe139 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -26,12 +26,12 @@ static void int_exit(int sig)
{
__u32 curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
@@ -79,13 +79,11 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "FSN";
int prog_fd, map_fd, opt;
+ struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
@@ -123,11 +121,19 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ err = bpf_object__load(obj);
+ if (err)
return 1;
+ prog_fd = bpf_program__fd(prog);
+
map = bpf_object__next_map(obj, NULL);
if (!map) {
printf("finding a map in obj file failed\n");
@@ -143,7 +149,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index a70b094c8ec5..b3f6e49676ed 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -34,12 +34,12 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
@@ -82,15 +82,13 @@ static void usage(const char *cmd)
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
unsigned char opt_flags[256] = {};
const char *optstr = "i:T:P:SNFh";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
int i, prog_fd, map_fd, opt;
+ struct bpf_program *prog;
struct bpf_object *obj;
__u32 max_pckt_size = 0;
__u32 key = 0;
@@ -148,11 +146,20 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return 1;
+
+ prog_fd = bpf_program__fd(prog);
+
/* static global var 'max_pcktsz' is accessible from .data section */
if (max_pckt_size) {
map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
@@ -173,7 +180,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 4ad896782f77..1828487bae9a 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+ err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+ err = bpf_xdp_detach(idx, xdp_flags, NULL);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
@@ -75,14 +75,11 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
const char *prog_name = "xdp_fwd";
struct bpf_program *prog = NULL;
struct bpf_program *pos;
const char *sec_name;
- int prog_fd, map_fd = -1;
+ int prog_fd = -1, map_fd = -1;
char filename[PATH_MAX];
struct bpf_object *obj;
int opt, i, idx, err;
@@ -119,7 +116,6 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@@ -127,7 +123,14 @@ int main(int argc, char **argv)
return 1;
}
- err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
if (err) {
printf("Does kernel support devmap lookup?\n");
/* If not, the error message will be:
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
index 25e3a405375f..87c54bfdbb70 100644
--- a/samples/bpf/xdp_redirect_cpu.bpf.c
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -491,7 +491,7 @@ int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpumap/redirect")
+SEC("xdp/cpumap")
int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -507,19 +507,19 @@ int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
return bpf_redirect_map(&tx_port, 0, 0);
}
-SEC("xdp_cpumap/pass")
+SEC("xdp/cpumap")
int xdp_redirect_cpu_pass(struct xdp_md *ctx)
{
return XDP_PASS;
}
-SEC("xdp_cpumap/drop")
+SEC("xdp/cpumap")
int xdp_redirect_cpu_drop(struct xdp_md *ctx)
{
return XDP_DROP;
}
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
int xdp_redirect_egress_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index a81704d3317b..5f74a70a9021 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -70,7 +70,7 @@ static void print_avail_progs(struct bpf_object *obj)
printf(" Programs to be used for -p/--progname:\n");
bpf_object__for_each_program(pos, obj) {
- if (bpf_program__is_xdp(pos)) {
+ if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
if (!strncmp(bpf_program__name(pos), "xdp_prognum",
sizeof("xdp_prognum") - 1))
printf(" %s\n", bpf_program__name(pos));
diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c
index 59efd656e1b2..415bac1758e3 100644
--- a/samples/bpf/xdp_redirect_map.bpf.c
+++ b/samples/bpf/xdp_redirect_map.bpf.c
@@ -68,7 +68,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx)
return xdp_redirect_map(ctx, &tx_port_native);
}
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
int xdp_redirect_map_egress(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
index bb0a5a3bfcf0..8b2fd4ec2c76 100644
--- a/samples/bpf/xdp_redirect_map_multi.bpf.c
+++ b/samples/bpf/xdp_redirect_map_multi.bpf.c
@@ -53,7 +53,7 @@ int xdp_redirect_map_native(struct xdp_md *ctx)
return xdp_redirect_map(ctx, &forward_map_native);
}
-SEC("xdp_devmap/egress")
+SEC("xdp/devmap")
int xdp_devmap_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index cfaf7e50e431..6dae87d83e1c 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -43,13 +43,13 @@ static void int_exit(int sig)
int i = 0;
for (i = 0; i < total_ifindex; i++) {
- if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
- printf("bpf_get_link_xdp_id on iface %d failed\n",
+ if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+ printf("bpf_xdp_query_id on iface %d failed\n",
ifindex_list[i]);
exit(1);
}
if (prog_id_list[i] == prog_id)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_xdp_detach(ifindex_list[i], flags, NULL);
else if (!prog_id)
printf("couldn't find a prog id on iface %d\n",
ifindex_list[i]);
@@ -640,12 +640,10 @@ static void usage(const char *prog)
int main(int ac, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "SF";
+ struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
char **ifname_list;
@@ -653,7 +651,6 @@ int main(int ac, char **argv)
int err, i = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
total_ifindex = ac - 1;
ifname_list = (argv + 1);
@@ -684,14 +681,20 @@ int main(int ac, char **argv)
return 1;
}
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
printf("\n******************loading bpf file*********************\n");
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("bpf_object__load(): %s\n", strerror(errno));
return 1;
}
+ prog_fd = bpf_program__fd(prog);
lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
@@ -716,12 +719,12 @@ int main(int ac, char **argv)
}
prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
for (i = 0; i < total_ifindex; i++) {
- if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+ if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
printf("link set xdp fd failed\n");
int recovery_index = i;
for (i = 0; i < recovery_index; i++)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+ bpf_xdp_detach(ifindex_list[i], flags, NULL);
return 1;
}
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 74a2926eba08..f2d90cba5164 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -62,15 +62,15 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(EXIT_FAIL);
}
if (prog_id == curr_prog_id) {
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a given iface\n");
} else {
@@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void)
static struct record *alloc_record_per_rxq(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct record *array;
array = calloc(nr_rxqs, sizeof(struct record));
@@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void)
static struct stats_record *alloc_stats_record(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct stats_record *rec;
int i;
@@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void)
static void free_stats_record(struct stats_record *r)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
int i;
for (i = 0; i < nr_rxqs; i++)
@@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec)
map_collect_percpu(fd, 0, &rec->stats);
fd = bpf_map__fd(rx_queue_index_map);
- max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ max_rxqs = bpf_map__max_entries(rx_queue_index_map);
for (i = 0; i < max_rxqs; i++)
map_collect_percpu(fd, i, &rec->rxq[i]);
}
@@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
int action, __u32 cfg_opt)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, err = 0;
struct record *rec, *prev;
@@ -450,14 +450,12 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd, map_fd, opt, err;
bool use_separators = true;
struct config cfg = { 0 };
+ struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
@@ -471,11 +469,19 @@ int main(int argc, char **argv)
char *action_str = NULL;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return EXIT_FAIL;
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return EXIT_FAIL;
+ prog_fd = bpf_program__fd(prog);
+
map = bpf_object__find_map_by_name(obj, "config_map");
stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map");
rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map");
@@ -582,7 +588,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 587eacb49103..0a2b3e997aed 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name)
__u32 info_len = sizeof(info);
int err;
- err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+ err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name)
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+ err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
if (err) {
- printf("bpf_get_link_xdp_id failed\n");
+ printf("bpf_xdp_query_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+ err = bpf_xdp_detach(idx, xdp_flags, NULL);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
index 8740838e7767..c4332d068b91 100644
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -1218,7 +1218,7 @@ int sample_setup_maps(struct bpf_map **maps)
default:
return -EINVAL;
}
- if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
+ if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0)
return -errno;
}
sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
@@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
int ret;
if (prog_id) {
- ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
if (ret < 0)
return -errno;
@@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
}
}
- return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ return bpf_xdp_detach(ifindex, xdp_flags, NULL);
}
int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
@@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
- ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
- xdp_flags);
+ ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
if (ret < 0) {
ret = -errno;
fprintf(stderr,
@@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
return ret;
}
- ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
if (ret < 0) {
ret = -errno;
fprintf(stderr,
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
index 5f44b877ecf5..f45051679977 100644
--- a/samples/bpf/xdp_sample_user.h
+++ b/samples/bpf/xdp_sample_user.h
@@ -61,7 +61,7 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size)
#define __attach_tp(name) \
({ \
- if (!bpf_program__is_tracing(skel->progs.name)) \
+ if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\
return -EINVAL; \
skel->links.name = bpf_program__attach(skel->progs.name); \
if (!skel->links.name) \
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 1d4f305d02aa..2e811e4331cc 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
@@ -152,9 +152,6 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
int min_port = 0, max_port = 0, vip2tnl_map_fd;
const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
@@ -162,6 +159,7 @@ int main(int argc, char **argv)
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
struct iptnl_info tnl = {};
+ struct bpf_program *prog;
struct bpf_object *obj;
struct vip vip = {};
char filename[256];
@@ -259,15 +257,20 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("bpf_object__load(): %s\n", strerror(errno));
return 1;
}
+ prog_fd = bpf_program__fd(prog);
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
@@ -288,7 +291,7 @@ int main(int argc, char **argv)
}
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
@@ -302,7 +305,7 @@ int main(int argc, char **argv)
poll_stats(kill_after_s);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
return 0;
}
diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c
index cc4408797ab7..28b5f2a9fa08 100644
--- a/samples/bpf/xdpsock_ctrl_proc.c
+++ b/samples/bpf/xdpsock_ctrl_proc.c
@@ -173,7 +173,7 @@ main(int argc, char **argv)
unlink(SOCKET_NAME);
/* Unset fd for given ifindex */
- err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+ err = bpf_xdp_detach(ifindex, 0, NULL);
if (err) {
fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
return err;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index aa50864e4415..19288a2bbc75 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -571,13 +571,13 @@ static void remove_xdp_program(void)
{
u32 curr_prog_id = 0;
- if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(EXIT_FAILURE);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+ bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
@@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
if (ret)
exit_with_error(-ret);
- ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+ ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
if (ret)
exit_with_error(-ret);
@@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj)
exit(EXIT_FAILURE);
}
- if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+ if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
index 52e7c4ffd228..2220509588a0 100644
--- a/samples/bpf/xsk_fwd.c
+++ b/samples/bpf/xsk_fwd.c
@@ -974,8 +974,8 @@ static void remove_xdp_program(void)
int i;
for (i = 0 ; i < n_ports; i++)
- bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
- port_params[i].xsk_cfg.xdp_flags);
+ bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+ port_params[i].xsk_cfg.xdp_flags, NULL);
}
int main(int argc, char **argv)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index a6403ddf5de7..096625242475 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -87,21 +87,25 @@ class HeaderParser(object):
self.line = ''
self.helpers = []
self.commands = []
+ self.desc_unique_helpers = set()
+ self.define_unique_helpers = []
+ self.desc_syscalls = []
+ self.enum_syscalls = []
def parse_element(self):
proto = self.parse_symbol()
- desc = self.parse_desc()
- ret = self.parse_ret()
+ desc = self.parse_desc(proto)
+ ret = self.parse_ret(proto)
return APIElement(proto=proto, desc=desc, ret=ret)
def parse_helper(self):
proto = self.parse_proto()
- desc = self.parse_desc()
- ret = self.parse_ret()
+ desc = self.parse_desc(proto)
+ ret = self.parse_ret(proto)
return Helper(proto=proto, desc=desc, ret=ret)
def parse_symbol(self):
- p = re.compile(' \* ?(.+)$')
+ p = re.compile(' \* ?(BPF\w+)$')
capture = p.match(self.line)
if not capture:
raise NoSyscallCommandFound
@@ -127,16 +131,15 @@ class HeaderParser(object):
self.line = self.reader.readline()
return capture.group(1)
- def parse_desc(self):
+ def parse_desc(self, proto):
p = re.compile(' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line)
if not capture:
- # Helper can have empty description and we might be parsing another
- # attribute: return but do not consume.
- return ''
+ raise Exception("No description section found for " + proto)
# Description can be several lines, some of them possibly empty, and it
# stops when another subsection title is met.
desc = ''
+ desc_present = False
while True:
self.line = self.reader.readline()
if self.line == ' *\n':
@@ -145,21 +148,24 @@ class HeaderParser(object):
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
+ desc_present = True
desc += capture.group(1) + '\n'
else:
break
+
+ if not desc_present:
+ raise Exception("No description found for " + proto)
return desc
- def parse_ret(self):
+ def parse_ret(self, proto):
p = re.compile(' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line)
if not capture:
- # Helper can have empty retval and we might be parsing another
- # attribute: return but do not consume.
- return ''
+ raise Exception("No return section found for " + proto)
# Return value description can be several lines, some of them possibly
# empty, and it stops when another subsection title is met.
ret = ''
+ ret_present = False
while True:
self.line = self.reader.readline()
if self.line == ' *\n':
@@ -168,44 +174,101 @@ class HeaderParser(object):
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
+ ret_present = True
ret += capture.group(1) + '\n'
else:
break
+
+ if not ret_present:
+ raise Exception("No return found for " + proto)
return ret
- def seek_to(self, target, help_message):
+ def seek_to(self, target, help_message, discard_lines = 1):
self.reader.seek(0)
offset = self.reader.read().find(target)
if offset == -1:
raise Exception(help_message)
self.reader.seek(offset)
self.reader.readline()
- self.reader.readline()
+ for _ in range(discard_lines):
+ self.reader.readline()
self.line = self.reader.readline()
- def parse_syscall(self):
+ def parse_desc_syscall(self):
self.seek_to('* DOC: eBPF Syscall Commands',
'Could not find start of eBPF syscall descriptions list')
while True:
try:
command = self.parse_element()
self.commands.append(command)
+ self.desc_syscalls.append(command.proto)
+
except NoSyscallCommandFound:
break
- def parse_helpers(self):
+ def parse_enum_syscall(self):
+ self.seek_to('enum bpf_cmd {',
+ 'Could not find start of bpf_cmd enum', 0)
+ # Searches for either one or more BPF\w+ enums
+ bpf_p = re.compile('\s*(BPF\w+)+')
+ # Searches for an enum entry assigned to another entry,
+ # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+ # not documented hence should be skipped in check to
+ # determine if the right number of syscalls are documented
+ assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+ bpf_cmd_str = ''
+ while True:
+ capture = assign_p.match(self.line)
+ if capture:
+ # Skip line if an enum entry is assigned to another entry
+ self.line = self.reader.readline()
+ continue
+ capture = bpf_p.match(self.line)
+ if capture:
+ bpf_cmd_str += self.line
+ else:
+ break
+ self.line = self.reader.readline()
+ # Find the number of occurences of BPF\w+
+ self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+ def parse_desc_helpers(self):
self.seek_to('* Start of BPF helper function descriptions:',
'Could not find start of eBPF helper descriptions list')
while True:
try:
helper = self.parse_helper()
self.helpers.append(helper)
+ proto = helper.proto_break_down()
+ self.desc_unique_helpers.add(proto['name'])
except NoHelperFound:
break
+ def parse_define_helpers(self):
+ # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+ # later with the number of unique function names present in description.
+ # Note: seek_to(..) discards the first line below the target search text,
+ # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+ self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+ 'Could not find start of eBPF helper definition list')
+ # Searches for either one or more FN(\w+) defines or a backslash for newline
+ p = re.compile('\s*(FN\(\w+\))+|\\\\')
+ fn_defines_str = ''
+ while True:
+ capture = p.match(self.line)
+ if capture:
+ fn_defines_str += self.line
+ else:
+ break
+ self.line = self.reader.readline()
+ # Find the number of occurences of FN(\w+)
+ self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
def run(self):
- self.parse_syscall()
- self.parse_helpers()
+ self.parse_desc_syscall()
+ self.parse_enum_syscall()
+ self.parse_desc_helpers()
+ self.parse_define_helpers()
self.reader.close()
###############################################################################
@@ -235,6 +298,25 @@ class Printer(object):
self.print_one(elem)
self.print_footer()
+ def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+ """
+ Checks the number of helpers/syscalls documented within the header file
+ description with those defined as part of enum/macro and raise an
+ Exception if they don't match.
+ """
+ nr_desc_unique_elem = len(desc_unique_elem)
+ nr_define_unique_elem = len(define_unique_elem)
+ if nr_desc_unique_elem != nr_define_unique_elem:
+ exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+ if nr_desc_unique_elem < nr_define_unique_elem:
+ # Function description is parsed until no helper is found (which can be due to
+ # misformatting). Hence, only print the first missing/misformatted helper/enum.
+ exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+ raise Exception(exception_msg)
class PrinterRST(Printer):
"""
@@ -295,7 +377,6 @@ class PrinterRST(Printer):
print('')
-
class PrinterHelpersRST(PrinterRST):
"""
A printer for dumping collected information about helpers as a ReStructured
@@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST):
"""
def __init__(self, parser):
self.elements = parser.helpers
+ self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
def print_header(self):
header = '''\
@@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST):
"""
def __init__(self, parser):
self.elements = parser.commands
+ self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
def print_header(self):
header = '''\
@@ -509,6 +592,7 @@ class PrinterHelpers(Printer):
"""
def __init__(self, parser):
self.elements = parser.helpers
+ self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
type_fwds = [
'struct bpf_fib_lookup',
diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh
index e6093adf4c06..c293941612e7 100755
--- a/scripts/pahole-flags.sh
+++ b/scripts/pahole-flags.sh
@@ -7,7 +7,7 @@ if ! [ -x "$(command -v ${PAHOLE})" ]; then
exit 0
fi
-pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
+pahole_ver=$($(dirname $0)/pahole-version.sh ${PAHOLE})
if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then
# pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
diff --git a/scripts/pahole-version.sh b/scripts/pahole-version.sh
new file mode 100755
index 000000000000..f8a32ab93ad1
--- /dev/null
+++ b/scripts/pahole-version.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage: $ ./pahole-version.sh pahole
+#
+# Prints pahole's version in a 3-digit form, such as 119 for v1.19.
+
+if [ ! -x "$(command -v "$@")" ]; then
+ echo 0
+ exit 1
+fi
+
+"$@" --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 842889f3dcb7..a9f8c63a96d1 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
if (rc)
- return -EPERM;
+ return rc;
#ifdef CONFIG_CGROUP_DEVICE
return devcgroup_legacy_check_permission(type, major, minor, access);
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 59833125ac0a..a2c665beda87 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -902,7 +902,7 @@ static int do_show(int argc, char **argv)
equal_fn_for_key_as_id, NULL);
btf_map_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!btf_prog_table || !btf_map_table) {
+ if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
hashmap__free(btf_prog_table);
hashmap__free(btf_map_table);
if (fd >= 0)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 3571a281c43f..effe136119d7 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
const char *attach_flags_str,
int level)
{
+ char prog_name[MAX_PROG_FULL_NAME];
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd;
@@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
return -1;
}
+ get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
@@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
attach_flags_str);
- jsonw_string_field(json_wtr, "name", info.name);
+ jsonw_string_field(json_wtr, "name", prog_name);
jsonw_end_object(json_wtr);
} else {
printf("%s%-8u ", level ? " " : "", info.id);
@@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
printf("%-15s", attach_type_name[attach_type]);
else
printf("type %-10u", attach_type);
- printf(" %-15s %-15s\n", attach_flags_str, info.name);
+ printf(" %-15s %-15s\n", attach_flags_str, prog_name);
}
close(prog_fd);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index fa8eb8134344..606743c6db41 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -24,6 +24,7 @@
#include <bpf/bpf.h>
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
#include "main.h"
@@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
return names[type];
}
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+ char *name_buff, size_t buff_len)
+{
+ const char *prog_name = prog_info->name;
+ const struct btf_type *func_type;
+ const struct bpf_func_info finfo = {};
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ struct btf *prog_btf = NULL;
+
+ if (buff_len <= BPF_OBJ_NAME_LEN ||
+ strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+ goto copy_name;
+
+ if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+ goto copy_name;
+
+ info.nr_func_info = 1;
+ info.func_info_rec_size = prog_info->func_info_rec_size;
+ if (info.func_info_rec_size > sizeof(finfo))
+ info.func_info_rec_size = sizeof(finfo);
+ info.func_info = ptr_to_u64(&finfo);
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+ goto copy_name;
+
+ prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!prog_btf)
+ goto copy_name;
+
+ func_type = btf__type_by_id(prog_btf, finfo.type_id);
+ if (!func_type || !btf_is_func(func_type))
+ goto copy_name;
+
+ prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+ snprintf(name_buff, buff_len, "%s", prog_name);
+
+ if (prog_btf)
+ btf__free(prog_btf);
+}
+
int get_fd_type(int fd)
{
char path[PATH_MAX];
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index e999159fa28d..9c894b1447de 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -487,17 +487,12 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
size_t maxlen;
bool res;
- if (ifindex)
- /* Only test offload-able program types */
- switch (prog_type) {
- case BPF_PROG_TYPE_SCHED_CLS:
- case BPF_PROG_TYPE_XDP:
- break;
- default:
- return;
- }
+ if (ifindex) {
+ p_info("BPF offload feature probing is not supported");
+ return;
+ }
- res = bpf_probe_prog_type(prog_type, ifindex);
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL);
#ifdef USE_LIBCAP
/* Probe may succeed even if program load fails, for unprivileged users
* check that we did not fail because of insufficient permissions
@@ -535,7 +530,12 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
size_t maxlen;
bool res;
- res = bpf_probe_map_type(map_type, ifindex);
+ if (ifindex) {
+ p_info("BPF offload feature probing is not supported");
+ return;
+ }
+
+ res = libbpf_probe_bpf_map_type(map_type, NULL);
/* Probe result depends on the success of map creation, no additional
* check required for unprivileged users
@@ -567,7 +567,12 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
bool res = false;
if (supported_type) {
- res = bpf_probe_helper(id, prog_type, ifindex);
+ if (ifindex) {
+ p_info("BPF offload feature probing is not supported");
+ return;
+ }
+
+ res = libbpf_probe_bpf_helper(prog_type, id, NULL);
#ifdef USE_LIBCAP
/* Probe may succeed even if program load fails, for
* unprivileged users check that we did not fail because of
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index b4695df2ea3d..eacfc6a2060d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
/* only generate definitions for memory-mapped internal maps */
if (!bpf_map__is_internal(map))
continue;
- if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
if (!get_map_ident(map, map_ident, sizeof(map_ident)))
@@ -378,13 +378,16 @@ static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
int prog_fd = skel->progs.%2$s.prog_fd; \n\
", obj_name, bpf_program__name(prog));
- switch (bpf_program__get_type(prog)) {
+ switch (bpf_program__type(prog)) {
case BPF_PROG_TYPE_RAW_TRACEPOINT:
tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
- printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
+ printf("\tint fd = skel_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
break;
case BPF_PROG_TYPE_TRACING:
- printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
+ if (bpf_program__expected_attach_type(prog) == BPF_TRACE_ITER)
+ printf("\tint fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);\n");
+ else
+ printf("\tint fd = skel_raw_tracepoint_open(NULL, prog_fd);\n");
break;
default:
printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n");
@@ -468,7 +471,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
if (!get_map_ident(map, ident, sizeof(ident)))
continue;
if (bpf_map__is_internal(map) &&
- (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
printf("\tmunmap(skel->%1$s, %2$zd);\n",
ident, bpf_map_mmap_sz(map));
codegen("\
@@ -536,7 +539,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
continue;
if (!bpf_map__is_internal(map) ||
- !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
codegen("\
@@ -600,10 +603,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
continue;
if (!bpf_map__is_internal(map) ||
- !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
continue;
- if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+ if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
mmap_flags = "PROT_READ";
else
mmap_flags = "PROT_READ | PROT_WRITE";
@@ -927,7 +930,6 @@ static int do_skeleton(int argc, char **argv)
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
goto err; \n\
- obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
@@ -962,7 +964,7 @@ static int do_skeleton(int argc, char **argv)
i, bpf_map__name(map), i, ident);
/* memory-mapped internal maps */
if (bpf_map__is_internal(map) &&
- (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+ (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
i, ident);
}
@@ -1000,6 +1002,7 @@ static int do_skeleton(int argc, char **argv)
\n\
s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\
\n\
+ obj->skeleton = s; \n\
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2c258db0d352..97dec81950e5 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2020 Facebook */
#include <errno.h>
+#include <linux/err.h>
#include <net/if.h>
#include <stdio.h>
#include <unistd.h>
@@ -306,7 +307,7 @@ static int do_show(int argc, char **argv)
if (show_pinned) {
link_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!link_table) {
+ if (IS_ERR(link_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 020e91a542d5..490f7bd54e4c 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -478,7 +478,11 @@ int main(int argc, char **argv)
}
if (!legacy_libbpf) {
- ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ /* Allow legacy map definitions for skeleton generation.
+ * It will still be rejected if users use LIBBPF_STRICT_ALL
+ * mode for loading generated skeleton.
+ */
+ ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
if (ret)
p_err("failed to enable libbpf strict mode: %d", ret);
}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8d76d937a62b..0c3840596b5a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -140,6 +140,10 @@ struct cmd {
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv));
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+ char *name_buff, size_t buff_len);
+
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index cc530a229812..c66a3c979b7a 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -699,7 +699,7 @@ static int do_show(int argc, char **argv)
if (show_pinned) {
map_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!map_table) {
+ if (IS_ERR(map_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 649053704bd7..526a332c48e6 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
flags |= XDP_FLAGS_HW_MODE;
- return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+ return bpf_xdp_attach(ifindex, progfd, flags, NULL);
}
static int do_attach(int argc, char **argv)
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 56b598eee043..7c384d10e95f 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2020 Facebook */
#include <errno.h>
+#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
libbpf_print_fn_t default_print;
*map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
- if (!*map) {
+ if (IS_ERR(*map)) {
p_err("failed to create hashmap for PID references");
return -1;
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 2a21d50516bc..92a6f679ef7d 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -424,8 +424,10 @@ out_free:
free(value);
}
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
{
+ char prog_name[MAX_PROG_FULL_NAME];
+
jsonw_uint_field(json_wtr, "id", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
jsonw_string_field(json_wtr, "type",
@@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info)
else
jsonw_uint_field(json_wtr, "type", info->type);
- if (*info->name)
- jsonw_string_field(json_wtr, "name", info->name);
+ if (*info->name) {
+ get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+ jsonw_string_field(json_wtr, "name", prog_name);
+ }
jsonw_name(json_wtr, "tag");
jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
@@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
char *memlock;
jsonw_start_object(json_wtr);
- print_prog_header_json(info);
+ print_prog_header_json(info, fd);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
if (info->load_time) {
@@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_object(json_wtr);
}
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
{
+ char prog_name[MAX_PROG_FULL_NAME];
+
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
printf("%s ", prog_type_name[info->type]);
else
printf("type %u ", info->type);
- if (*info->name)
- printf("name %s ", info->name);
+ if (*info->name) {
+ get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+ printf("name %s ", prog_name);
+ }
printf("tag ");
fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
@@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
{
char *memlock;
- print_prog_header_plain(info);
+ print_prog_header_plain(info, fd);
if (info->load_time) {
char buf[32];
@@ -641,7 +649,7 @@ static int do_show(int argc, char **argv)
if (show_pinned) {
prog_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
- if (!prog_table) {
+ if (IS_ERR(prog_table)) {
p_err("failed to create hashmap for pinned paths");
return -1;
}
@@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv)
if (json_output && nb_fds > 1) {
jsonw_start_object(json_wtr); /* prog object */
- print_prog_header_json(&info);
+ print_prog_header_json(&info, fds[i]);
jsonw_name(json_wtr, "insns");
} else if (nb_fds > 1) {
- print_prog_header_plain(&info);
+ print_prog_header_plain(&info, fds[i]);
}
err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
@@ -1264,12 +1272,12 @@ static int do_run(int argc, char **argv)
{
char *data_fname_in = NULL, *data_fname_out = NULL;
char *ctx_fname_in = NULL, *ctx_fname_out = NULL;
- struct bpf_prog_test_run_attr test_attr = {0};
const unsigned int default_size = SZ_32K;
void *data_in = NULL, *data_out = NULL;
void *ctx_in = NULL, *ctx_out = NULL;
unsigned int repeat = 1;
int fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, test_attr);
if (!REQ_ARGS(4))
return -1;
@@ -1387,14 +1395,13 @@ static int do_run(int argc, char **argv)
goto free_ctx_in;
}
- test_attr.prog_fd = fd;
test_attr.repeat = repeat;
test_attr.data_in = data_in;
test_attr.data_out = data_out;
test_attr.ctx_in = ctx_in;
test_attr.ctx_out = ctx_out;
- err = bpf_prog_test_run_xattr(&test_attr);
+ err = bpf_prog_test_run_opts(fd, &test_attr);
if (err) {
p_err("failed to run program: %s", strerror(errno));
goto free_ctx_out;
@@ -2275,10 +2282,10 @@ static int do_profile(int argc, char **argv)
profile_obj->rodata->num_metric = num_metric;
/* adjust map sizes */
- bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
- bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
- bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
- bpf_map__resize(profile_obj->maps.counts, 1);
+ bpf_map__set_max_entries(profile_obj->maps.events, num_metric * num_cpu);
+ bpf_map__set_max_entries(profile_obj->maps.fentry_readings, num_metric);
+ bpf_map__set_max_entries(profile_obj->maps.accum_readings, num_metric);
+ bpf_map__set_max_entries(profile_obj->maps.counts, 1);
/* change target name */
profile_tgt_name = profile_target_name(profile_tgt_fd);
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index 2f693b082bdb..e08a6ff2866c 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv)
static int do_register(int argc, char **argv)
{
LIBBPF_OPTS(bpf_object_open_opts, open_opts);
- const struct bpf_map_def *def;
struct bpf_map_info info = {};
__u32 info_len = sizeof(info);
int nr_errs = 0, nr_maps = 0;
@@ -510,8 +509,7 @@ static int do_register(int argc, char **argv)
}
bpf_object__for_each_map(map, obj) {
- def = bpf_map__def(map);
- if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
continue;
link = bpf_map__attach_struct_ops(map);
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 320a88ac28c9..19a3112e271a 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -24,6 +24,8 @@ LD = $(HOSTLD)
ARCH = $(HOSTARCH)
RM ?= rm
CROSS_COMPILE =
+CFLAGS := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
@@ -51,10 +53,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \
- DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
$(abspath $@) install_headers
-CFLAGS := -g \
+CFLAGS += -g \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
-I$(LIBBPF_INCLUDE) \
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b0383d371b9a..afe3d0d7f5f2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
* *ctx_out*, *data_in* and *data_out* must be NULL.
* *repeat* must be zero.
*
+ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
*/
#define BPF_F_SLEEPABLE (1U << 4)
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1775,6 +1782,8 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
+ * Description
+ * Get the current pid and tgid.
* Return
* A 64-bit integer containing the current tgid and pid, and
* created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
* *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
+ * Description
+ * Get the current uid and gid.
* Return
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
* The 32-bit hash.
*
* u64 bpf_get_current_task(void)
+ * Description
+ * Get the current task.
* Return
* A pointer to the current task struct.
*
@@ -2369,6 +2382,8 @@ union bpf_attr {
* indicate that the hash is outdated and to trigger a
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * Return
+ * void.
*
* long bpf_get_numa_node_id(void)
* Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Description
+ * Get the owner UID of the socked associated to *skb*.
* Return
* The owner UID of the socket associated to *skb*. If the socket
* is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
+ * Description
+ * Get the current cgroup id based on the cgroup within which
+ * the current task is running.
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
@@ -5018,6 +5038,54 @@ union bpf_attr {
*
* Return
* The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ * Description
+ * Get the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ * Description
+ * Set the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ * Description
+ * Get the total size of a given xdp buff (linear and paged area)
+ * Return
+ * The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * This helper is provided as an easy way to load data from a
+ * xdp buffer. It can be used to load *len* bytes from *offset* from
+ * the frame associated to *xdp_md*, into the buffer pointed by
+ * *buf*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * Store *len* bytes from buffer *buf* into the frame
+ * associated to *xdp_md*, at *offset*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ * address space, and stores the data in *dst*. *flags* is not
+ * used yet and is provided for future extensibility. This helper
+ * can only be used by sleepable programs.
+ * Return
+ * 0 on success, or a negative error in case of failure. On error
+ * *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5206,6 +5274,12 @@ union bpf_attr {
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
+ FN(get_retval), \
+ FN(set_retval), \
+ FN(xdp_get_buff_len), \
+ FN(xdp_load_bytes), \
+ FN(xdp_store_bytes), \
+ FN(copy_from_user_task), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5500,7 +5574,8 @@ struct bpf_sock {
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
- __u32 dst_port; /* network byte order */
+ __be16 dst_port; /* network byte order */
+ __u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
@@ -6378,7 +6453,8 @@ struct bpf_sk_lookup {
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
- __u32 remote_port; /* Network byte order */
+ __be16 remote_port; /* Network byte order */
+ __u16 :16; /* Zero padding */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f947b61b2107..b8b37fe76006 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -131,7 +131,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -194,7 +194,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 550b4cbb6c99..418b259166f8 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
.flags = flags,
);
- return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+ return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
}
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts)
{
@@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
return libbpf_err_errno(ret);
}
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 14e0d97ad2cf..16b21757b8bf 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -391,6 +391,10 @@ struct bpf_prog_attach_opts {
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts);
@@ -449,12 +453,14 @@ struct bpf_prog_test_run_attr {
* out: length of cxt_out */
};
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
/*
* bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_xattr instead.
+ * using bpf_prog_test_run_opts instead.
*/
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
__u32 size, void *data_out, __u32 *size_out,
__u32 *retval, __u32 *duration);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 963b1060d944..44df982d2a5c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -133,7 +133,7 @@ struct bpf_map_def {
unsigned int value_size;
unsigned int max_entries;
unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 90f56b0f585f..e3a8c947e89f 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -76,6 +76,9 @@
#define __PT_RC_REG ax
#define __PT_SP_REG sp
#define __PT_IP_REG ip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
#else
@@ -105,6 +108,9 @@
#define __PT_RC_REG rax
#define __PT_SP_REG rsp
#define __PT_IP_REG rip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
#endif /* __i386__ */
@@ -112,6 +118,10 @@
#elif defined(bpf_target_s390)
+struct pt_regs___s390 {
+ unsigned long orig_gpr2;
+};
+
/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
#define __PT_PARM1_REG gprs[2]
@@ -124,6 +134,8 @@
#define __PT_RC_REG gprs[2]
#define __PT_SP_REG gprs[15]
#define __PT_IP_REG psw.addr
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)
#elif defined(bpf_target_arm)
@@ -140,6 +152,10 @@
#elif defined(bpf_target_arm64)
+struct pt_regs___arm64 {
+ unsigned long orig_x0;
+};
+
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
#define __PT_PARM1_REG regs[0]
@@ -152,6 +168,8 @@
#define __PT_RC_REG regs[0]
#define __PT_SP_REG sp
#define __PT_IP_REG pc
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)
#elif defined(bpf_target_mips)
@@ -178,6 +196,8 @@
#define __PT_RC_REG gpr[3]
#define __PT_SP_REG sp
#define __PT_IP_REG nip
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
#elif defined(bpf_target_sparc)
@@ -206,10 +226,12 @@
#define __PT_PARM4_REG a3
#define __PT_PARM5_REG a4
#define __PT_RET_REG ra
-#define __PT_FP_REG fp
+#define __PT_FP_REG s0
#define __PT_RC_REG a5
#define __PT_SP_REG sp
-#define __PT_IP_REG epc
+#define __PT_IP_REG pc
+/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
#endif
@@ -263,6 +285,26 @@ struct pt_regs;
#endif
+#ifndef PT_REGS_PARM1_SYSCALL
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x)
+#endif
+#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x)
+#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x)
+#ifndef PT_REGS_PARM4_SYSCALL
+#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x)
+#endif
+#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x)
+
+#ifndef PT_REGS_PARM1_CORE_SYSCALL
+#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x)
+#endif
+#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x)
+#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x)
+#ifndef PT_REGS_PARM4_CORE_SYSCALL
+#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x)
+#endif
+#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x)
+
#else /* defined(bpf_target_defined) */
#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -290,8 +332,30 @@ struct pt_regs;
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
#endif /* defined(bpf_target_defined) */
+/*
+ * When invoked from a syscall handler kprobe, returns a pointer to a
+ * struct pt_regs containing syscall arguments and suitable for passing to
+ * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL().
+ */
+#ifndef PT_REGS_SYSCALL_REGS
+/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx))
+#endif
+
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
#endif
@@ -406,4 +470,39 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+#define ___bpf_syscall_args0() ctx
+#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * tracing syscall functions, like __x64_sys_close. It hides the underlying
+ * platform-specific low-level way of getting syscall input arguments from
+ * struct pt_regs, and provides a familiar typed and named function arguments
+ * syntax and semantics of accessing syscall input parameters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ *
+ * This macro relies on BPF CO-RE support.
+ */
+#define BPF_KPROBE_SYSCALL(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_syscall_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 9aa19c89f758..1383e26c5d1f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
struct btf_pipe {
const struct btf *src;
struct btf *dst;
+ struct hashmap *str_off_map; /* map string offsets from src to dst */
};
static int btf_rewrite_str(__u32 *str_off, void *ctx)
{
struct btf_pipe *p = ctx;
- int off;
+ void *mapped_off;
+ int off, err;
if (!*str_off) /* nothing to do for empty strings */
return 0;
+ if (p->str_off_map &&
+ hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+ *str_off = (__u32)(long)mapped_off;
+ return 0;
+ }
+
off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
if (off < 0)
return off;
+ /* Remember string mapping from src to dst. It avoids
+ * performing expensive string comparisons.
+ */
+ if (p->str_off_map) {
+ err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+ if (err)
+ return err;
+ }
+
*str_off = off;
return 0;
}
@@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
return 0;
}
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
if (!off)
return libbpf_err(-ENOMEM);
+ /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+ p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(p.str_off_map))
+ return libbpf_err(-ENOMEM);
+
/* bulk copy types data for all types from src_btf */
memcpy(t, src_btf->types_data, data_sz);
@@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
btf->hdr->str_off += data_sz;
btf->nr_types += cnt;
+ hashmap__free(p.str_off_map);
+
/* return type ID of the first added BTF type */
return btf->start_id + btf->nr_types - cnt;
err_out:
@@ -1767,6 +1794,8 @@ err_out:
* wasn't modified, so doesn't need restoring, see big comment above */
btf->hdr->str_len = old_strs_len;
+ hashmap__free(p.str_off_map);
+
return libbpf_err(err);
}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 061839f04525..951ac7475794 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -147,11 +147,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead")
-LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
__u32 expected_value_size,
@@ -159,8 +158,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
-LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
- __u32 *size);
+LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
@@ -171,8 +169,10 @@ int btf_ext__reloc_line_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **line_info, __u32 *cnt);
-LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
const struct btf_dump_type_data_opts *opts);
/*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
*/
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC 12 /* Function */
+#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
+#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+
static inline __u16 btf_kind(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index b9a3260c83cb..07ebe70d3a30 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1861,14 +1861,16 @@ static int btf_dump_array_data(struct btf_dump *d,
{
const struct btf_array *array = btf_array(t);
const struct btf_type *elem_type;
- __u32 i, elem_size = 0, elem_type_id;
+ __u32 i, elem_type_id;
+ __s64 elem_size;
bool is_array_member;
elem_type_id = array->type;
elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
elem_size = btf__resolve_size(d->btf, elem_type_id);
if (elem_size <= 0) {
- pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ pr_warn("unexpected elem size %zd for array type [%u]\n",
+ (ssize_t)elem_size, id);
return -EINVAL;
}
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index 3c20b126d60d..aeb09c288716 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
void hashmap__free(struct hashmap *map)
{
- if (!map)
+ if (IS_ERR_OR_NULL(map))
return;
hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
return true;
}
-
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7f10dd501a52..2262bcdfee92 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -156,14 +156,6 @@ enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
{
- /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
- * get all possible values we compensate last +1, and then (2*x - 1)
- * to get the bit mask
- */
- if (mode != LIBBPF_STRICT_ALL
- && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
- return errno = EINVAL, -EINVAL;
-
libbpf_mode = mode;
return 0;
}
@@ -235,6 +227,10 @@ enum sec_def_flags {
SEC_SLEEPABLE = 8,
/* allow non-strict prefix matching */
SEC_SLOPPY_PFX = 16,
+ /* BPF program support non-linear XDP buffer */
+ SEC_XDP_FRAGS = 32,
+ /* deprecated sec definitions not supposed to be used */
+ SEC_DEPRECATED = 64,
};
struct bpf_sec_def {
@@ -1937,6 +1933,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (obj->efile.maps_shndx < 0)
return 0;
+ if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+ pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (!symbols)
return -EINVAL;
@@ -1999,6 +2000,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return -LIBBPF_ERRNO__FORMAT;
}
+ pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
return -ENOTSUP;
@@ -4190,9 +4193,13 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
return 0;
if (!bpf_map__is_internal(map)) {
+ pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
def->value_size, &key_type_id,
&value_type_id);
+#pragma GCC diagnostic pop
} else {
/*
* LLVM annotates global data differently in BTF, that is,
@@ -6562,6 +6569,13 @@ static int libbpf_preload_prog(struct bpf_program *prog,
if (def & SEC_SLEEPABLE)
opts->prog_flags |= BPF_F_SLEEPABLE;
+ if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+ opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
+ if (def & SEC_DEPRECATED)
+ pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n",
+ prog->sec_name);
+
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -7883,10 +7897,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
return 0;
}
-const char *bpf_map__get_pin_path(const struct bpf_map *map)
-{
- return map->pin_path;
-}
+__alias(bpf_map__pin_path)
+const char *bpf_map__get_pin_path(const struct bpf_map *map);
const char *bpf_map__pin_path(const struct bpf_map *map)
{
@@ -8451,7 +8463,10 @@ static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
return fd;
}
-enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
+__alias(bpf_program__type)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+
+enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
{
return prog->type;
}
@@ -8495,8 +8510,10 @@ BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
-enum bpf_attach_type
-bpf_program__get_expected_attach_type(const struct bpf_program *prog)
+__alias(bpf_program__expected_attach_type)
+enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+
+enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
{
return prog->expected_attach_type;
}
@@ -8580,7 +8597,7 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe),
SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE),
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
- SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX),
+ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp),
@@ -8599,9 +8616,15 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+ SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
- SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
- SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
+ SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
+ SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
+ SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
+ SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -9443,7 +9466,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
open_attr.file = attr->file;
open_attr.prog_type = attr->prog_type;
- obj = bpf_object__open_xattr(&open_attr);
+ obj = __bpf_object__open_xattr(&open_attr, 0);
err = libbpf_get_error(obj);
if (err)
return libbpf_err(-ENOENT);
@@ -9460,7 +9483,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
bpf_program__set_expected_attach_type(prog,
attach_type);
}
- if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
/*
* we haven't guessed from section name and user
* didn't provide a fallback type, too bad...
@@ -9477,7 +9500,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
}
bpf_object__for_each_map(map, obj) {
- if (!bpf_map__is_offload_neutral(map))
+ if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
map->map_ifindex = attr->ifindex;
}
@@ -10511,7 +10534,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- attach_type = bpf_program__get_expected_attach_type(prog);
+ attach_type = bpf_program__expected_attach_type(prog);
link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
if (link_fd < 0) {
link_fd = -errno;
@@ -11795,6 +11818,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
+ if (!s)
+ return;
+
if (s->progs)
bpf_object__detach_skeleton(s);
if (s->obj)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8b9bc5e90c2b..c8d8daad212e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -180,9 +180,11 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
/* deprecated bpf_object__open variants */
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead")
LIBBPF_API struct bpf_object *
bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
const char *name);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead")
LIBBPF_API struct bpf_object *
bpf_object__open_xattr(struct bpf_object_open_attr *attr);
@@ -244,8 +246,10 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev);
(pos) = (tmp), (tmp) = bpf_object__next(tmp))
typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
LIBBPF_API int
@@ -277,9 +281,10 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog)
typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
bpf_program_clear_priv_t clear_priv);
-
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
@@ -591,26 +596,39 @@ LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
/*
* Adjust type of BPF program. Default is kprobe.
*/
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
enum bpf_prog_type type);
LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+bpf_program__expected_attach_type(const struct bpf_program *prog);
LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
@@ -631,18 +649,31 @@ LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
/*
@@ -706,7 +737,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
/* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
/* get/set map type */
@@ -715,6 +747,7 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
/* get/set map size (max_entries) */
LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead")
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
/* get/set map flags */
LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
@@ -739,8 +772,10 @@ LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv);
+LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
@@ -757,7 +792,6 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
*/
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
-LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
@@ -832,13 +866,42 @@ struct bpf_xdp_set_link_opts {
};
#define bpf_xdp_set_link_opts__last_field old_fd
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+struct bpf_xdp_attach_opts {
+ size_t sz;
+ int old_prog_fd;
+ size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+ size_t sz;
+ __u32 prog_id; /* output */
+ __u32 drv_prog_id; /* output */
+ __u32 hw_prog_id; /* output */
+ __u32 skb_prog_id; /* output */
+ __u8 attach_mode; /* output */
+ size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
/* TC related API */
enum bpf_tc_attach_point {
BPF_TC_INGRESS = 1 << 0,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 529783967793..aef6253a90c8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
bpf_link_create;
bpf_link_update;
bpf_map__set_initial_value;
+ bpf_prog_attach_opts;
bpf_program__attach_cgroup;
bpf_program__attach_lsm;
bpf_program__is_lsm;
@@ -423,10 +424,16 @@ LIBBPF_0.6.0 {
LIBBPF_0.7.0 {
global:
bpf_btf_load;
+ bpf_program__expected_attach_type;
bpf_program__log_buf;
bpf_program__log_level;
bpf_program__set_log_buf;
bpf_program__set_log_level;
+ bpf_program__type;
+ bpf_xdp_attach;
+ bpf_xdp_detach;
+ bpf_xdp_query;
+ bpf_xdp_query_id;
libbpf_probe_bpf_helper;
libbpf_probe_bpf_map_type;
libbpf_probe_bpf_prog_type;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 1565679eb432..bc86b82e90d1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -92,6 +92,9 @@
# define offsetofend(TYPE, FIELD) \
(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
#endif
+#ifndef __alias
+#define __alias(symbol) __attribute__((alias(#symbol)))
+#endif
/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is
* a string literal known at compilation time or char * pointer known only at
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 79131f761a27..a283cf031665 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
* operation.
*/
LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+ /*
+ * Error out on any SEC("maps") map definition, which are deprecated
+ * in favor of BTF-defined map definitions in SEC(".maps").
+ */
+ LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
__LIBBPF_STRICT_LAST,
};
@@ -81,6 +86,23 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
+/* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
+ * They are normally a trivial aliases or wrappers for proper APIs and are
+ * left to minimize unnecessary disruption for users of libbpf. But they
+ * shouldn't be used going forward.
+ */
+
+struct bpf_program;
+struct bpf_map;
+struct btf;
+struct btf_ext;
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 39f25e09b51e..c39c37f99d5c 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+ int old_prog_fd, err;
+
+ if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+ return libbpf_err(-EINVAL);
+
+ old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ if (old_prog_fd)
+ flags |= XDP_FLAGS_REPLACE;
+ else
+ old_prog_fd = -1;
+
+ err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+ return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+ return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts)
{
@@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
{
- struct xdp_id_md xdp_id = {};
- __u32 mask;
- int ret;
struct libbpf_nla_req req = {
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_type = RTM_GETLINK,
.nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.ifinfo.ifi_family = AF_PACKET,
};
+ struct xdp_id_md xdp_id = {};
+ int err;
- if (flags & ~XDP_FLAGS_MASK || !info_size)
+ if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+ return libbpf_err(-EINVAL);
+
+ if (xdp_flags & ~XDP_FLAGS_MASK)
return libbpf_err(-EINVAL);
/* Check whether the single {HW,DRV,SKB} mode is set */
- flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
- mask = flags - 1;
- if (flags && flags & mask)
+ xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+ if (xdp_flags & (xdp_flags - 1))
return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
- xdp_id.flags = flags;
+ xdp_id.flags = xdp_flags;
- ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+ err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
get_xdp_info, &xdp_id);
- if (!ret) {
- size_t sz = min(info_size, sizeof(xdp_id.info));
+ if (err)
+ return libbpf_err(err);
- memcpy(info, &xdp_id.info, sz);
- memset((void *) info + sz, 0, info_size - sz);
- }
+ OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+ OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+ OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+ OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+ OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
- return libbpf_err(ret);
+ return 0;
}
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags)
{
- flags &= XDP_FLAGS_MODES;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ size_t sz;
+ int err;
+
+ if (!info_size)
+ return libbpf_err(-EINVAL);
- if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
- return info->prog_id;
- if (flags & XDP_FLAGS_DRV_MODE)
- return info->drv_prog_id;
- if (flags & XDP_FLAGS_HW_MODE)
- return info->hw_prog_id;
- if (flags & XDP_FLAGS_SKB_MODE)
- return info->skb_prog_id;
+ err = bpf_xdp_query(ifindex, flags, &opts);
+ if (err)
+ return libbpf_err(err);
+
+ /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+ * layout after sz field
+ */
+ sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+ memcpy(info, &opts.prog_id, sz);
+ memset((void *)info + sz, 0, info_size - sz);
return 0;
}
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
{
- struct xdp_link_info info;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
int ret;
- ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
- if (!ret)
- *prog_id = get_xdp_id(&info, flags);
+ ret = bpf_xdp_query(ifindex, flags, &opts);
+ if (ret)
+ return libbpf_err(ret);
+
+ flags &= XDP_FLAGS_MODES;
- return libbpf_err(ret);
+ if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+ *prog_id = opts.prog_id;
+ else if (flags & XDP_FLAGS_DRV_MODE)
+ *prog_id = opts.drv_prog_id;
+ else if (flags & XDP_FLAGS_HW_MODE)
+ *prog_id = opts.hw_prog_id;
+ else if (flags & XDP_FLAGS_SKB_MODE)
+ *prog_id = opts.skb_prog_id;
+ else
+ *prog_id = 0;
+
+ return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+ return bpf_xdp_query_id(ifindex, flags, prog_id);
}
typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 0b84d8e6b72a..dcd3336512d4 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -70,19 +70,85 @@ static inline int skel_closenz(int fd)
return -EINVAL;
}
+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+#endif
+
+static inline int skel_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+
+ attr.map_type = map_type;
+ strncpy(attr.map_name, map_name, sizeof(attr.map_name));
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+
+ return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz);
+}
+
+static inline int skel_map_update_elem(int fd, const void *key,
+ const void *value, __u64 flags)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+ attr.key = (long) key;
+ attr.value = (long) value;
+ attr.flags = flags;
+
+ return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.raw_tracepoint.name = (long) name;
+ attr.raw_tracepoint.prog_fd = prog_fd;
+
+ return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
+}
+
+static inline int skel_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.link_create.prog_fd = prog_fd;
+ attr.link_create.target_fd = target_fd;
+ attr.link_create.attach_type = attach_type;
+
+ return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
+}
+
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL);
+ map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
err = -errno;
goto out;
}
- err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+ err = skel_map_update_elem(map_fd, &key, opts->data, 0);
if (err < 0) {
opts->errstr = "failed to update loader map";
err = -errno;
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 8ac0a3a457ef..0bc25a56cfef 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -13,7 +13,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
{
struct bpf_object *obj;
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
+ obj = bpf_object__open_mem(obj_buf, obj_buf_sz, NULL);
if (libbpf_get_error(obj))
return TEST_FAIL;
bpf_object__close(obj);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 7ecfaac7536a..3cd5ae200f2c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -54,6 +54,7 @@ static bool libbpf_initialized;
struct bpf_object *
bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name);
struct bpf_object *obj;
if (!libbpf_initialized) {
@@ -61,7 +62,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
libbpf_initialized = true;
}
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
+ obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load buffer\n");
return ERR_PTR(-EINVAL);
@@ -72,6 +73,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
struct bpf_object *bpf__prepare_load(const char *filename, bool source)
{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
struct bpf_object *obj;
if (!libbpf_initialized) {
@@ -94,7 +96,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
} else
pr_debug("bpf: successful builtin compilation\n");
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+ obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
llvm__dump_obj(filename, obj_buf, obj_buf_sz);
@@ -654,11 +656,11 @@ int bpf__probe(struct bpf_object *obj)
}
if (priv->is_tp) {
- bpf_program__set_tracepoint(prog);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
continue;
}
- bpf_program__set_kprobe(prog);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
pev = &priv->pev;
err = convert_perf_probe_events(pev, 1);
@@ -1005,24 +1007,22 @@ __bpf_map__config_value(struct bpf_map *map,
{
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
- const struct bpf_map_def *def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- if (def->type != BPF_MAP_TYPE_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
- if (def->key_size < sizeof(unsigned int)) {
+ if (bpf_map__key_size(map) < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
- switch (def->value_size) {
+ switch (bpf_map__value_size(map)) {
case 1:
case 2:
case 4:
@@ -1064,7 +1064,6 @@ __bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct evlist *evlist)
{
- const struct bpf_map_def *def;
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1075,18 +1074,16 @@ __bpf_map__config_event(struct bpf_map *map,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
- return PTR_ERR(def);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
+ return PTR_ERR(map);
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
- if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1135,7 +1132,6 @@ config_map_indices_range_check(struct parse_events_term *term,
const char *map_name)
{
struct parse_events_array *array = &term->array;
- const struct bpf_map_def *def;
unsigned int i;
if (!array->nr_ranges)
@@ -1146,10 +1142,8 @@ config_map_indices_range_check(struct parse_events_term *term,
return -BPF_LOADER_ERRNO__INTERNAL;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1158,7 +1152,7 @@ config_map_indices_range_check(struct parse_events_term *term,
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
- if (idx >= def->max_entries) {
+ if (idx >= bpf_map__max_entries(map)) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
@@ -1251,21 +1245,21 @@ out:
}
typedef int (*map_config_func_t)(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
- int map_fd, const struct bpf_map_def *pdef,
+ int map_fd, const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i;
int err;
- for (i = 0; i < pdef->max_entries; i++) {
- err = func(name, map_fd, pdef, op, &i, arg);
+ for (i = 0; i < bpf_map__max_entries(map); i++) {
+ err = func(name, map_fd, map, op, &i, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, i);
@@ -1278,7 +1272,7 @@ foreach_key_array_all(map_config_func_t func,
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i, j;
@@ -1291,7 +1285,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
for (j = 0; j < length; j++) {
unsigned int idx = start + j;
- err = func(name, map_fd, pdef, op, &idx, arg);
+ err = func(name, map_fd, map, op, &idx, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, idx);
@@ -1307,9 +1301,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
map_config_func_t func,
void *arg)
{
- int err, map_fd;
+ int err, map_fd, type;
struct bpf_map_op *op;
- const struct bpf_map_def *def;
const char *name = bpf_map__name(map);
struct bpf_map_priv *priv = bpf_map__priv(map);
@@ -1322,9 +1315,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
return 0;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: failed to get definition from map %s\n", name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
map_fd = bpf_map__fd(map);
@@ -1333,19 +1325,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
return map_fd;
}
+ type = bpf_map__type(map);
list_for_each_entry(op, &priv->ops_list, list) {
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
- map_fd, def, op);
+ map_fd, map, op);
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
- map_fd, def,
- op);
+ map_fd, map, op);
break;
default:
pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1454,7 +1446,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
@@ -1463,7 +1455,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
switch (op->op_type) {
case BPF_MAP_OP_SET_VALUE:
err = apply_config_value_for_key(map_fd, pkey,
- pdef->value_size,
+ bpf_map__value_size(map),
op->v.value);
break;
case BPF_MAP_OP_SET_EVSEL:
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
index eb853ca67cf4..c863ae0c5cb5 100644
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -9,25 +9,25 @@
#include <stdlib.h>
#include <unistd.h>
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
{
- return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
}
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
{
- if (bpf_map_def__is_per_cpu(def))
- return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+ if (bpf_map__is_per_cpu(bpf_map__type(map)))
+ return malloc(round_up(bpf_map__value_size(map), 8) *
+ sysconf(_SC_NPROCESSORS_CONF));
- return malloc(def->value_size);
+ return malloc(bpf_map__value_size(map));
}
int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
{
- const struct bpf_map_def *def = bpf_map__def(map);
void *prev_key = NULL, *key, *value;
int fd = bpf_map__fd(map), err;
int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
if (fd < 0)
return fd;
- if (IS_ERR(def))
- return PTR_ERR(def);
+ if (!map)
+ return PTR_ERR(map);
err = -ENOMEM;
- key = malloc(def->key_size);
+ key = malloc(bpf_map__key_size(map));
if (key == NULL)
goto out;
- value = bpf_map_def__alloc_value(def);
+ value = bpf_map__alloc_value(map);
if (value == NULL)
goto out_free_key;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 42ffc24e9e71..91ea729990da 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
-I$(abspath $(OUTPUT)/../usr/include)
@@ -330,7 +330,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
- map_ptr_kern.c core_kern.c
+ map_ptr_kern.c core_kern.c core_kern_overflow.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
SKEL_BLACKLIST += $$(LSKELS)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 42ef250c7acc..d099d91adc3b 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -206,6 +206,8 @@ btf_tag test and Clang version
The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
+The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires
+pahole version ``1.23``.
Without them, the btf_tag selftest will be skipped and you will observe:
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index da8593b3494a..c2554f9695ff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+ bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 7f957c55a3ca..0c481de2833d 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -154,7 +154,6 @@ static void *uprobe_producer_without_nop(void *input)
static void usetup(bool use_retprobe, bool use_nop)
{
size_t uprobe_offset;
- ssize_t base_addr;
struct bpf_link *link;
setup_libbpf();
@@ -165,11 +164,10 @@ static void usetup(bool use_retprobe, bool use_nop)
exit(1);
}
- base_addr = get_base_addr();
if (use_nop)
- uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
+ uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
else
- uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
+ uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index df3b292a8ffe..27d63be47b95 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -13,6 +13,10 @@
#define CREATE_TRACE_POINTS
#include "bpf_testmod-events.h"
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
noinline void
@@ -21,6 +25,27 @@ bpf_testmod_test_mod_kfunc(int i)
*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
}
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 __user *p;
+};
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+ BTF_TYPE_EMIT(func_proto_typedef);
+ BTF_TYPE_EMIT(func_proto_typedef_nested1);
+ BTF_TYPE_EMIT(func_proto_typedef_nested2);
+ return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
+ return arg->p->a;
+}
+
noinline int bpf_testmod_loop_test(int n)
{
int i, sum = 0;
@@ -109,26 +134,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &bpf_testmod_check_kfunc_ids,
+};
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
int ret;
- ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
- if (ret)
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
- return 0;
+ if (bpf_fentry_test1(0) < 0)
+ return -EINVAL;
+ return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
static void bpf_testmod_exit(void)
{
- unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f6287132fa89..763db63a3890 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 86b7d5d84eec..ab62aba10e2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -7,18 +7,18 @@
static void test_add(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__add__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(add)"))
return;
prog_fd = skel->progs.add.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
@@ -39,19 +39,18 @@ cleanup:
static void test_sub(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__sub__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
return;
prog_fd = skel->progs.sub.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run sub",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
@@ -72,18 +71,18 @@ cleanup:
static void test_and(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__and__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(and)"))
return;
prog_fd = skel->progs.and.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run and",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
@@ -100,19 +99,18 @@ cleanup:
static void test_or(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__or__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(or)"))
return;
prog_fd = skel->progs.or.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run or",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
@@ -129,18 +127,18 @@ cleanup:
static void test_xor(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__xor__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
return;
prog_fd = skel->progs.xor.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run xor",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
@@ -157,18 +155,18 @@ cleanup:
static void test_cmpxchg(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__cmpxchg__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
return;
prog_fd = skel->progs.cmpxchg.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run cmpxchg",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
@@ -186,18 +184,18 @@ cleanup:
static void test_xchg(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
int link_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
link_fd = atomics_lskel__xchg__attach(skel);
if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
return;
prog_fd = skel->progs.xchg.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run xchg",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
goto cleanup;
ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d0bd51eb23c8..d48f6e533e1e 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -5,9 +5,10 @@
/* this is how USDT semaphore is actually defined, except volatile modifier */
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
-/* attach point */
-static void method(void) {
- return ;
+/* uprobe attach point */
+static void trigger_func(void)
+{
+ asm volatile ("");
}
void test_attach_probe(void)
@@ -17,8 +18,7 @@ void test_attach_probe(void)
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
- size_t uprobe_offset;
- ssize_t base_addr, ref_ctr_offset;
+ ssize_t uprobe_offset, ref_ctr_offset;
bool legacy;
/* Check if new-style kprobe/uprobe API is supported.
@@ -34,11 +34,9 @@ void test_attach_probe(void)
*/
legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
- base_addr = get_base_addr();
- if (CHECK(base_addr < 0, "get_base_addr",
- "failed to find base addr: %zd", base_addr))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
return;
- uprobe_offset = get_uprobe_offset(&method, base_addr);
ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
@@ -103,7 +101,7 @@ void test_attach_probe(void)
goto cleanup;
/* trigger & validate uprobe & uretprobe */
- method();
+ trigger_func();
if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
"wrong uprobe res: %d\n", skel->bss->uprobe_res))
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index d0f06e40c16d..eac71fbb24ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/capability.h>
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
static int duration;
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
+
void try_bind(int family, int port, int expected_errno)
{
struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
struct bind_perm *skel;
int cgroup_fd;
+ if (create_netns())
+ return;
+
cgroup_fd = test__join_cgroup("/bind_perm");
if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 5eea3c3a40fe..cd10df6cd0fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -8,6 +8,12 @@
#include <test_progs.h>
#include "test_bpf_cookie.skel.h"
+/* uprobe attach point */
+static void trigger_func(void)
+{
+ asm volatile ("");
+}
+
static void kprobe_subtest(struct test_bpf_cookie *skel)
{
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
@@ -62,11 +68,11 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
struct bpf_link *link1 = NULL, *link2 = NULL;
struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
- size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t uprobe_offset;
- base_addr = get_base_addr();
- uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
/* attach two uprobes */
opts.bpf_cookie = 0x100;
@@ -99,7 +105,7 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
goto cleanup;
/* trigger uprobe && uretprobe */
- get_base_addr();
+ trigger_func();
ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..5142a7d130b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,24 @@ static void test_task(void)
bpf_iter_task__destroy(skel);
}
+static void test_task_sleepable(void)
+{
+ struct bpf_iter_task *skel;
+
+ skel = bpf_iter_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_sleepable);
+
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+ "num_expected_failure_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+ "num_success_copy_from_user_task");
+
+ bpf_iter_task__destroy(skel);
+}
+
static void test_task_stack(void)
{
struct bpf_iter_task_stack *skel;
@@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
test_bpf_map();
if (test__start_subtest("task"))
test_task();
+ if (test__start_subtest("task_sleepable"))
+ test_task_sleepable();
if (test__start_subtest("task_stack"))
test_task_stack();
if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..ee725d4d98a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ .sun_path = "",
+ };
+ socklen_t len;
+ int fd, err;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_NEQ(fd, -1, "socket"))
+ return -1;
+
+ len = offsetof(struct sockaddr_un, sun_path);
+ err = bind(fd, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ len = sizeof(addr);
+ err = getsockname(fd, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+
+ memcpy(&skel->bss->sun_path, &addr.sun_path,
+ len - offsetof(struct sockaddr_un, sun_path));
+
+ return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < NR_CASES; i++) {
+ if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+ "bpf_(get|set)sockopt"))
+ return;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->data->sndbuf_setsockopt[i]),
+ sizeof(skel->data->sndbuf_setsockopt[i]));
+ if (!ASSERT_OK(err, "setsockopt"))
+ return;
+
+ optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+ err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->bss->sndbuf_getsockopt_expected[i]),
+ &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ return;
+
+ if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+ skel->bss->sndbuf_getsockopt_expected[i],
+ "bpf_(get|set)sockopt"))
+ return;
+ }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+ struct bpf_iter_setsockopt_unix *skel;
+ int err, unix_fd, iter_fd;
+ char buf;
+
+ skel = bpf_iter_setsockopt_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ unix_fd = create_unix_socket(skel);
+ if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+ goto destroy;
+
+ skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+ if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+ goto destroy;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto destroy;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto destroy;
+
+ test_sndbuf(skel, unix_fd);
+destroy:
+ bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644
index 000000000000..d43f548c572c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+ const char *str_open;
+ void *(*bpf_open_and_load)();
+ void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+ _TS_INVALID,
+ TS_MODULE_LOAD,
+ TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+ return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+ int ret, fd;
+
+ fd = open("bpf_testmod.ko", O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = sys_finit_module(fd, "", 0);
+ close(fd);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+ if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+ atomic_store(&state, TS_MODULE_LOAD);
+ else
+ atomic_store(&state, TS_MODULE_LOAD_FAIL);
+ return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+ return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+ struct uffdio_register uffd_register = {};
+ struct uffdio_api uffd_api = {};
+ int uffd;
+
+ uffd = sys_userfaultfd(O_CLOEXEC);
+ if (uffd < 0)
+ return -errno;
+
+ uffd_api.api = UFFD_API;
+ uffd_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+ close(uffd);
+ return -1;
+ }
+
+ uffd_register.range.start = (unsigned long)fault_addr;
+ uffd_register.range.len = 4096;
+ uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+ close(uffd);
+ return -1;
+ }
+ return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+ void *fault_addr, *skel_fail;
+ struct bpf_mod_race *skel;
+ struct uffd_msg uffd_msg;
+ pthread_t load_mod_thrd;
+ _Atomic int *blockingp;
+ int uffd, ret;
+
+ fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+ return;
+
+ if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+ goto end_mmap;
+
+ skel = bpf_mod_race__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+ goto end_module;
+
+ skel->rodata->bpf_mod_race_config.tgid = getpid();
+ skel->rodata->bpf_mod_race_config.inject_error = -4242;
+ skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+ if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+ goto end_destroy;
+ blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+ if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+ goto end_destroy;
+
+ uffd = test_setup_uffd(fault_addr);
+ if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+ "load module thread"))
+ goto end_uffd;
+
+ /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+ while (!atomic_load(&state) && !atomic_load(blockingp))
+ ;
+ if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+ goto end_join;
+ if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+ pthread_kill(load_mod_thrd, SIGKILL);
+ goto end_uffd;
+ }
+
+ /* We might have set bpf_blocking to 1, but may have not blocked in
+ * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+ */
+ if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+ "read uffd block event"))
+ goto end_join;
+ if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+ goto end_join;
+
+ /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+ * module state is still MODULE_STATE_COMING because mod->init hasn't
+ * returned. This is the time we try to load a program calling kfunc and
+ * check if we get ENXIO from verifier.
+ */
+ skel_fail = config->bpf_open_and_load();
+ ret = errno;
+ if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+ /* Close uffd to unblock load_mod_thrd */
+ close(uffd);
+ uffd = -1;
+ while (atomic_load(blockingp) != 2)
+ ;
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+ config->bpf_destroy(skel_fail);
+ goto end_join;
+
+ }
+ ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+ ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+ close(uffd);
+ uffd = -1;
+end_join:
+ pthread_join(load_mod_thrd, NULL);
+ if (uffd < 0)
+ ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+ if (uffd >= 0)
+ close(uffd);
+end_destroy:
+ bpf_mod_race__destroy(skel);
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+ sys_delete_module("bpf_testmod", 0);
+ ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+ munmap(fault_addr, 4096);
+ atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+ .str_open = "ksym_race__open_and_load",
+ .bpf_open_and_load = (void *)ksym_race__open_and_load,
+ .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+ .str_open = "kfunc_call_race__open_and_load",
+ .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+ .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+ if (test__start_subtest("ksym (used_btfs UAF)"))
+ test_bpf_mod_race_config(&ksym_config);
+ if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+ test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644
index 000000000000..dd30b1e3a67c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+ TEST_XDP,
+ TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+ struct test_bpf_nf *skel;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = test_bpf_nf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+ return;
+
+ if (mode == TEST_XDP)
+ prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+ else
+ prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto end;
+
+ ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+ ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+ ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+ ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+ ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+ ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+ ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+ test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+ if (test__start_subtest("xdp-ct"))
+ test_bpf_nf_ct(TEST_XDP);
+ if (test__start_subtest("tc-bpf-ct"))
+ test_bpf_nf_ct(TEST_TC_BPF);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8ba53acf9eb4..8b652f5ce423 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3939,6 +3939,25 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Invalid component_idx",
},
{
+ .descr = "decl_tag test #15, func, invalid func proto",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), /* [2] */
+ BTF_FUNC_ENC(NAME_TBD, 8), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0func"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
.descr = "type_tag test #1",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -4560,6 +4579,8 @@ static void do_test_file(unsigned int test_num)
has_btf_ext = btf_ext != NULL;
btf_ext__free(btf_ext);
+ /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
obj = bpf_object__open(test->file);
err = libbpf_get_error(obj);
if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4705,8 @@ skip:
fprintf(stderr, "OK");
done:
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
btf__free(btf);
free(func_info);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index 88d63e23e35f..f7560b54a6bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -1,19 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
-#include "btf_decl_tag.skel.h"
+#include <bpf/btf.h>
+#include "test_btf_decl_tag.skel.h"
/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
struct btf_type_tag_test {
int **p;
};
#include "btf_type_tag.skel.h"
+#include "btf_type_tag_user.skel.h"
static void test_btf_decl_tag(void)
{
- struct btf_decl_tag *skel;
+ struct test_btf_decl_tag *skel;
- skel = btf_decl_tag__open_and_load();
+ skel = test_btf_decl_tag__open_and_load();
if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
return;
@@ -22,7 +24,7 @@ static void test_btf_decl_tag(void)
test__skip();
}
- btf_decl_tag__destroy(skel);
+ test_btf_decl_tag__destroy(skel);
}
static void test_btf_type_tag(void)
@@ -41,10 +43,101 @@ static void test_btf_type_tag(void)
btf_type_tag__destroy(skel);
}
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+ const char *module_name = "bpf_testmod";
+ struct btf *vmlinux_btf, *module_btf;
+ struct btf_type_tag_user *skel;
+ __s32 type_id;
+ int err;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ /* skip the test if the module does not have __user tags */
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto free_vmlinux_btf;
+
+ type_id = btf__find_by_name_kind(module_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
+ test__skip();
+ goto free_module_btf;
+ }
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto free_module_btf;
+
+ bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
+ if (load_test_user1)
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ else
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+free_module_btf:
+ btf__free(module_btf);
+free_vmlinux_btf:
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_user(void)
+{
+ struct btf_type_tag_user *skel;
+ struct btf *vmlinux_btf;
+ __s32 type_id;
+ int err;
+
+ /* skip the test if the vmlinux does not have __user tags */
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ type_id = btf__find_by_name_kind(vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+ test__skip();
+ goto free_vmlinux_btf;
+ }
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto free_vmlinux_btf;
+
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+free_vmlinux_btf:
+ btf__free(vmlinux_btf);
+}
+
void test_btf_tag(void)
{
if (test__start_subtest("btf_decl_tag"))
test_btf_decl_tag();
if (test__start_subtest("btf_type_tag"))
test_btf_type_tag();
+ if (test__start_subtest("btf_type_tag_user_mod1"))
+ test_btf_type_tag_mod_user(true);
+ if (test__start_subtest("btf_type_tag_user_mod2"))
+ test_btf_type_tag_mod_user(false);
+ if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
+ test_btf_type_tag_vmlinux_user();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index d3e8f729c623..38b3c47293da 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_override", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_REPLACE;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_multi", "unexpected success\n"))
goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
attach_opts.replace_prog_fd = -1;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_bad_fd", "unexpected success\n"))
goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
/* replacing a program that is not attached to cgroup should fail */
attach_opts.replace_prog_fd = allow_prog[3];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_ent", "unexpected success\n"))
goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
/* replace 1st from the top program */
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
/* replace program with itself */
attach_opts.replace_prog_fd = allow_prog[6];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644
index 000000000000..0b47c3c000c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM 0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, assert that
+ * we actually get that error when we run setsockopt()
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, and one that gets the
+ * previously set errno. Assert that we get the same errno back.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that gets the previously set errno.
+ * Assert that, without anything setting one, we get 0.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that gets the previously set errno, and then
+ * one that sets the errno to EUNATCH. Assert that the get does not
+ * see EUNATCH set later, and does not prevent EUNATCH from being set.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+ * and then one that gets the exported errno. Assert both the syscall
+ * and the helper sees the last set errno.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that return a reject without setting errno
+ * (legacy reject), and one that gets the errno. Assert that for
+ * backward compatibility the syscall result in EPERM, and this
+ * is also visible to the helper.
+ */
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach setsockopt that sets EUNATCH, then one that return a reject
+ * without setting errno, and then one that gets the exported errno.
+ * Assert both the syscall and the helper's errno are unaffected by
+ * the second prog (i.e. legacy rejects does not override the errno
+ * to EPERM).
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that gets previously set errno. Assert that the
+ * error from kernel is in both ctx_retval_value and retval_value.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+ * overrides the value from kernel.
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+ * ctx retval. Assert that the clearing ctx retval is synced to helper
+ * and clears any errors both from kernel and BPF..
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_clear_retval);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+ int cgroup_fd = -1;
+ int sock_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto close_fd;
+
+ sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(sock_fd, 0, "start-server"))
+ goto close_fd;
+
+ if (test__start_subtest("setsockopt-set"))
+ test_setsockopt_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-set_and_get"))
+ test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero"))
+ test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero_and_set"))
+ test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-override"))
+ test_setsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_eperm"))
+ test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_no_override"))
+ test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-get"))
+ test_getsockopt_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-override"))
+ test_getsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-retval_sync"))
+ test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index f73e6e36b74d..12f4395f18b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -79,28 +79,21 @@ static void test_check_mtu_run_xdp(struct test_check_mtu *skel,
struct bpf_program *prog,
__u32 mtu_expect)
{
- const char *prog_name = bpf_program__name(prog);
int retval_expect = XDP_PASS;
__u32 mtu_result = 0;
char buf[256] = {};
- int err;
- struct bpf_prog_test_run_attr tattr = {
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.repeat = 1,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
- .prog_fd = bpf_program__fd(prog),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0, "bpf_prog_test_run",
- "prog_name:%s (err %d errno %d retval %d)\n",
- prog_name, err, errno, tattr.retval);
+ );
- CHECK(tattr.retval != retval_expect, "retval",
- "progname:%s unexpected retval=%d expected=%d\n",
- prog_name, tattr.retval, retval_expect);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
/* Extract MTU that BPF-prog got */
mtu_result = skel->bss->global_bpf_mtu_xdp;
@@ -139,28 +132,21 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
struct bpf_program *prog,
__u32 mtu_expect)
{
- const char *prog_name = bpf_program__name(prog);
int retval_expect = BPF_OK;
__u32 mtu_result = 0;
char buf[256] = {};
- int err;
- struct bpf_prog_test_run_attr tattr = {
- .repeat = 1,
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
- .prog_fd = bpf_program__fd(prog),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0, "bpf_prog_test_run",
- "prog_name:%s (err %d errno %d retval %d)\n",
- prog_name, err, errno, tattr.retval);
+ .repeat = 1,
+ );
- CHECK(tattr.retval != retval_expect, "retval",
- "progname:%s unexpected retval=%d expected=%d\n",
- prog_name, tattr.retval, retval_expect);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
/* Extract MTU that BPF-prog got */
mtu_result = skel->bss->global_bpf_mtu_tc;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index e075d03ab630..224f016b0a53 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -161,7 +161,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
}
}
-static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+static bool was_decapsulated(struct bpf_test_run_opts *tattr)
{
return tattr->data_size_out < tattr->data_size_in;
}
@@ -367,12 +367,12 @@ static void close_fds(int *fds, int n)
static void test_cls_redirect_common(struct bpf_program *prog)
{
- struct bpf_prog_test_run_attr tattr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
struct sockaddr *addr;
socklen_t slen;
- int i, j, err;
+ int i, j, err, prog_fd;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
@@ -394,7 +394,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
goto cleanup;
}
- tattr.prog_fd = bpf_program__fd(prog);
+ prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
@@ -415,7 +415,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
if (CHECK_FAIL(!tattr.data_size_in))
continue;
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
if (CHECK_FAIL(err))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
index 561c5185d886..6a5a1c019a5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_kern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -7,8 +7,22 @@
void test_core_kern_lskel(void)
{
struct core_kern_lskel *skel;
+ int link_fd;
skel = core_kern_lskel__open_and_load();
- ASSERT_OK_PTR(skel, "open_and_load");
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link_fd = core_kern_lskel__core_relo_proto__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+ ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists");
+ ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists");
+ ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested");
+
+cleanup:
core_kern_lskel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
new file mode 100644
index 000000000000..04cc145bc26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "core_kern_overflow.lskel.h"
+
+void test_core_kern_overflow_lskel(void)
+{
+ struct core_kern_overflow_lskel *skel;
+
+ skel = core_kern_overflow_lskel__open_and_load();
+ if (!ASSERT_NULL(skel, "open_and_load"))
+ core_kern_overflow_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index cbaa44ffb8c6..5aa52cc31dc2 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -26,10 +26,10 @@ static void test_dummy_st_ops_attach(void)
static void test_dummy_init_ret_value(void)
{
__u64 args[1] = {0};
- struct bpf_prog_test_run_attr attr = {
- .ctx_size_in = sizeof(args),
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
.ctx_in = args,
- };
+ .ctx_size_in = sizeof(args),
+ );
struct dummy_st_ops *skel;
int fd, err;
@@ -38,8 +38,7 @@ static void test_dummy_init_ret_value(void)
return;
fd = bpf_program__fd(skel->progs.test_1);
- attr.prog_fd = fd;
- err = bpf_prog_test_run_xattr(&attr);
+ err = bpf_prog_test_run_opts(fd, &attr);
ASSERT_OK(err, "test_run");
ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
@@ -53,10 +52,10 @@ static void test_dummy_init_ptr_arg(void)
.val = exp_retval,
};
__u64 args[1] = {(unsigned long)&in_state};
- struct bpf_prog_test_run_attr attr = {
- .ctx_size_in = sizeof(args),
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
.ctx_in = args,
- };
+ .ctx_size_in = sizeof(args),
+ );
struct dummy_st_ops *skel;
int fd, err;
@@ -65,8 +64,7 @@ static void test_dummy_init_ptr_arg(void)
return;
fd = bpf_program__fd(skel->progs.test_1);
- attr.prog_fd = fd;
- err = bpf_prog_test_run_xattr(&attr);
+ err = bpf_prog_test_run_opts(fd, &attr);
ASSERT_OK(err, "test_run");
ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
ASSERT_EQ(attr.retval, exp_retval, "test_ret");
@@ -77,10 +75,10 @@ static void test_dummy_init_ptr_arg(void)
static void test_dummy_multiple_args(void)
{
__u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
- struct bpf_prog_test_run_attr attr = {
- .ctx_size_in = sizeof(args),
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
.ctx_in = args,
- };
+ .ctx_size_in = sizeof(args),
+ );
struct dummy_st_ops *skel;
int fd, err;
size_t i;
@@ -91,8 +89,7 @@ static void test_dummy_multiple_args(void)
return;
fd = bpf_program__fd(skel->progs.test_2);
- attr.prog_fd = fd;
- err = bpf_prog_test_run_xattr(&attr);
+ err = bpf_prog_test_run_opts(fd, &attr);
ASSERT_OK(err, "test_run");
for (i = 0; i < ARRAY_SIZE(args); i++) {
snprintf(name, sizeof(name), "arg %zu", i);
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 4374ac8a8a91..130f5b82d2e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -9,38 +9,34 @@ void test_fentry_fexit(void)
struct fentry_test_lskel *fentry_skel = NULL;
struct fexit_test_lskel *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
- __u32 duration = 0, retval;
int err, prog_fd, i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
fentry_skel = fentry_test_lskel__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
fexit_skel = fexit_test_lskel__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
err = fentry_test_lskel__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
err = fexit_test_lskel__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "fexit_attach"))
goto close_prog;
prog_fd = fexit_skel->progs.test1.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test retval");
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
for (i = 0; i < 8; i++) {
- CHECK(fentry_res[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
- CHECK(fexit_res[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+ ASSERT_EQ(fentry_res[i], 1, "fentry result");
+ ASSERT_EQ(fexit_res[i], 1, "fexit result");
}
close_prog:
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 12921b3850d2..c0d1d61d5f66 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -6,9 +6,9 @@
static int fentry_test(struct fentry_test_lskel *fentry_skel)
{
int err, prog_fd, i;
- __u32 duration = 0, retval;
int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = fentry_test_lskel__attach(fentry_skel);
if (!ASSERT_OK(err, "fentry_attach"))
@@ -20,10 +20,9 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel)
return -1;
prog_fd = fentry_skel->progs.test1.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fentry_skel->bss;
for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c52f99f6a909..d9aad15e0d24 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,12 +58,17 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
test_cb cb)
{
struct bpf_object *obj = NULL, *tgt_obj;
- __u32 retval, tgt_prog_id, info_len;
+ __u32 tgt_prog_id, info_len;
struct bpf_prog_info prog_info = {};
struct bpf_program **prog = NULL, *p;
struct bpf_link **link = NULL;
int err, tgt_fd, i;
struct btf *btf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
@@ -132,7 +137,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
&link_info, &info_len);
ASSERT_OK(err, "link_fd_get_info");
ASSERT_EQ(link_info.tracing.attach_type,
- bpf_program__get_expected_attach_type(prog[i]),
+ bpf_program__expected_attach_type(prog[i]),
"link_attach_type");
ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
@@ -147,10 +152,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
if (!run_prog)
goto close_prog;
- err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, NULL);
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
ASSERT_OK(err, "prog_run");
- ASSERT_EQ(retval, 0, "prog_run_ret");
+ ASSERT_EQ(topts.retval, 0, "prog_run_ret");
if (check_data_map(obj, prog_cnt, false))
goto close_prog;
@@ -225,29 +229,31 @@ static int test_second_attach(struct bpf_object *obj)
const char *tgt_obj_file = "./test_pkt_access.o";
struct bpf_program *prog = NULL;
struct bpf_object *tgt_obj;
- __u32 duration = 0, retval;
struct bpf_link *link;
int err = 0, tgt_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+ if (!ASSERT_OK_PTR(prog, "find_prog"))
return -ENOENT;
err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
- tgt_obj_file, err, errno))
+ if (!ASSERT_OK(err, "second_prog_load"))
return err;
link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
if (!ASSERT_OK_PTR(link, "second_link"))
goto out;
- err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
+ if (!ASSERT_OK(err, "ipv6 test_run"))
+ goto out;
+ if (!ASSERT_OK(topts.retval, "ipv6 retval"))
goto out;
err = check_data_map(obj, 1, true);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index e4cede6b4b2d..3ee2107bbf7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -10,9 +10,7 @@ void test_fexit_stress(void)
char test_skb[128] = {};
int fexit_fd[CNT] = {};
int link_fd[CNT] = {};
- __u32 duration = 0;
char error[4096];
- __u32 prog_ret;
int err, i, filter_fd;
const struct bpf_insn trace_program[] = {
@@ -36,9 +34,15 @@ void test_fexit_stress(void)
.log_size = sizeof(error),
);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = test_skb,
+ .data_size_in = sizeof(test_skb),
+ .repeat = 1,
+ );
+
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
trace_opts.expected_attach_type);
- if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+ if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id"))
goto out;
trace_opts.attach_btf_id = err;
@@ -47,24 +51,20 @@ void test_fexit_stress(void)
trace_program,
sizeof(trace_program) / sizeof(struct bpf_insn),
&trace_opts);
- if (CHECK(fexit_fd[i] < 0, "fexit loaded",
- "failed: %d errno %d\n", fexit_fd[i], errno))
+ if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
goto out;
link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
- if (CHECK(link_fd[i] < 0, "fexit attach failed",
- "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+ if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
goto out;
}
filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
&skb_opts);
- if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
- filter_fd, errno))
+ if (!ASSERT_GE(filter_fd, 0, "test_program_loaded"))
goto out;
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
+ err = bpf_prog_test_run_opts(filter_fd, &topts);
close(filter_fd);
CHECK_FAIL(err);
out:
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d4887d8bb396..101b7343036b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -6,9 +6,9 @@
static int fexit_test(struct fexit_test_lskel *fexit_skel)
{
int err, prog_fd, i;
- __u32 duration = 0, retval;
int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = fexit_test_lskel__attach(fexit_skel);
if (!ASSERT_OK(err, "fexit_attach"))
@@ -20,10 +20,9 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel)
return -1;
prog_fd = fexit_skel->progs.test1.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fexit_skel->bss;
for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index ac54e3f91d42..0c1661ea996e 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -13,8 +13,9 @@
#endif
#define CHECK_FLOW_KEYS(desc, got, expected) \
- CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
+ _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \
desc, \
+ topts.duration, \
"nhoff=%u/%u " \
"thoff=%u/%u " \
"addr_proto=0x%x/0x%x " \
@@ -457,7 +458,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
if (map_fd < 0)
return -1;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -487,7 +488,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
/* Keep in sync with 'flags' from eth_get_headlen. */
__u32 eth_get_headlen_flags =
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
struct bpf_flow_keys flow_keys = {};
__u32 key = (__u32)(tests[i].keys.sport) << 16 |
tests[i].keys.dport;
@@ -503,13 +504,12 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ ASSERT_OK(err, "bpf_map_delete_elem");
}
}
@@ -573,27 +573,24 @@ void test_flow_dissector(void)
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
- struct bpf_prog_test_run_attr tattr = {
- .prog_fd = prog_fd,
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &tests[i].pkt,
.data_size_in = sizeof(tests[i].pkt),
.data_out = &flow_keys,
- };
+ );
static struct bpf_flow_keys ctx = {};
if (tests[i].flags) {
- tattr.ctx_in = &ctx;
- tattr.ctx_size_in = sizeof(ctx);
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
ctx.flags = tests[i].flags;
}
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
- err || tattr.retval != 1,
- tests[i].name,
- "err %d errno %d retval %d duration %d size %u/%zu\n",
- err, errno, tattr.retval, tattr.duration,
- tattr.data_size_out, sizeof(flow_keys));
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 1, "test_run retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 93ac3f28226c..36afb409c25f 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -5,7 +5,6 @@
void serial_test_flow_dissector_load_bytes(void)
{
struct bpf_flow_keys flow_keys;
- __u32 duration = 0, retval, size;
struct bpf_insn prog[] = {
// BPF_REG_1 - 1st argument: context
// BPF_REG_2 - 2nd argument: offset, start at first byte
@@ -27,22 +26,25 @@ void serial_test_flow_dissector_load_bytes(void)
BPF_EXIT_INSN(),
};
int fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = &flow_keys,
+ .data_size_out = sizeof(flow_keys),
+ .repeat = 1,
+ );
/* make sure bpf_skb_load_bytes is not allowed from skb-less context
*/
fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
- CHECK(fd < 0,
- "flow_dissector-bpf_skb_load_bytes-load",
- "fd %d errno %d\n",
- fd, errno);
+ ASSERT_GE(fd, 0, "bpf_test_load_program good fd");
- err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1,
- "flow_dissector-bpf_skb_load_bytes",
- "err %d errno %d retval %d duration %d size %u/%zu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
+ err = bpf_prog_test_run_opts(fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
+ ASSERT_EQ(topts.retval, 1, "test_run retval");
if (fd >= -1)
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 68eb12a287d4..044df13ee069 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -12,8 +12,13 @@ static void test_hash_map(void)
int i, err, hashmap_fd, max_entries, percpu_map_fd;
struct for_each_hash_map_elem *skel;
__u64 *percpu_valbuf = NULL;
- __u32 key, num_cpus, retval;
+ __u32 key, num_cpus;
__u64 val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
skel = for_each_hash_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
@@ -42,11 +47,10 @@ static void test_hash_map(void)
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
- 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
- &retval, &duration);
- if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
- err, errno, retval))
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
goto out;
ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
@@ -69,11 +73,16 @@ out:
static void test_array_map(void)
{
- __u32 key, num_cpus, max_entries, retval;
+ __u32 key, num_cpus, max_entries;
int i, arraymap_fd, percpu_map_fd, err;
struct for_each_array_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u64 val, expected_total;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
skel = for_each_array_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
@@ -106,11 +115,10 @@ static void test_array_map(void)
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
- 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
- &retval, &duration);
- if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
- err, errno, retval))
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
goto out;
ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 85c427119fe9..28cf63963cb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -5,8 +5,8 @@
void test_get_func_args_test(void)
{
struct get_func_args_test *skel = NULL;
- __u32 duration = 0, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
skel = get_func_args_test__open_and_load();
if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
@@ -20,19 +20,17 @@ void test_get_func_args_test(void)
* fentry/fexit programs.
*/
prog_fd = bpf_program__fd(skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
/* This runs bpf_modify_return_test function and triggers
* fmod_ret_test and fexit_test programs.
*/
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 1234, "test_run");
+ ASSERT_EQ(topts.retval, 1234, "test_run");
ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index 02a465f36d59..938dbd4d7c2f 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -5,8 +5,8 @@
void test_get_func_ip_test(void)
{
struct get_func_ip_test *skel = NULL;
- __u32 duration = 0, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
skel = get_func_ip_test__open();
if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
@@ -29,14 +29,12 @@ void test_get_func_ip_test(void)
goto cleanup;
prog_fd = bpf_program__fd(skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
prog_fd = bpf_program__fd(skel->progs.test5);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index 8d5a6023a1bb..5308de1ed478 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -27,7 +27,7 @@ void test_get_stackid_cannot_attach(void)
return;
/* override program type */
- bpf_program__set_perf_event(skel->progs.oncpu);
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 9da131b32e13..6fb3d3155c35 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
if (CHECK_FAIL(map_fd < 0))
return;
- buff = malloc(bpf_map__def(map)->value_size);
+ buff = malloc(bpf_map__value_size(map));
if (buff)
err = bpf_map_update_elem(map_fd, &zero, buff, 0);
free(buff);
@@ -132,24 +132,26 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
void test_global_data(void)
{
const char *file = "./test_global_data.o";
- __u32 duration = 0, retval;
struct bpf_object *obj;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ if (!ASSERT_OK(err, "load program"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "pass global data run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "pass global data run err");
+ ASSERT_OK(topts.retval, "pass global data run retval");
- test_global_data_number(obj, duration);
- test_global_data_string(obj, duration);
- test_global_data_struct(obj, duration);
- test_global_data_rdonly(obj, duration);
+ test_global_data_number(obj, topts.duration);
+ test_global_data_string(obj, topts.duration);
+ test_global_data_struct(obj, topts.duration);
+ test_global_data_rdonly(obj, topts.duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 1db86eab101b..57331c606964 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -20,7 +20,7 @@ void test_global_data_init(void)
if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
goto out;
- sz = bpf_map__def(map)->value_size;
+ sz = bpf_map__value_size(map);
newval = malloc(sz);
if (CHECK_FAIL(!newval))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 93a2439237b0..29039a36cce5 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -40,19 +40,21 @@ static void test_global_func_args0(struct bpf_object *obj)
void test_global_func_args(void)
{
const char *file = "./test_global_func_args.o";
- __u32 retval;
struct bpf_object *obj;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK(err, "load program", "error %d loading %s\n", err, file))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "pass global func args run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
test_global_func_args0(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index ce10d2fc3a6c..1cee6957285e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -53,24 +53,24 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
void serial_test_kfree_skb(void)
{
struct __sk_buff skb = {};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v6,
.data_size_in = sizeof(pkt_v6),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
- };
+ );
struct kfree_skb *skel = NULL;
struct bpf_link *link;
struct bpf_object *obj;
struct perf_buffer *pb = NULL;
- int err;
+ int err, prog_fd;
bool passed = false;
__u32 duration = 0;
const int zero = 0;
bool test_ok[2];
err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &tattr.prog_fd);
+ &obj, &prog_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
return;
@@ -100,11 +100,9 @@ void serial_test_kfree_skb(void)
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
- err = bpf_prog_test_run_xattr(&tattr);
- duration = tattr.duration;
- CHECK(err || tattr.retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, tattr.retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test_run retval");
/* read perf buffer */
err = perf_buffer__poll(pb, 100);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7d7445ccc141..c00eb974eb85 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -9,23 +9,31 @@
static void test_main(void)
{
struct kfunc_call_test_lskel *skel;
- int prog_fd, retval, err;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
skel = kfunc_call_test_lskel__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel"))
return;
prog_fd = skel->progs.kfunc_call_test1.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
- ASSERT_EQ(retval, 12, "test1-retval");
+ ASSERT_EQ(topts.retval, 12, "test1-retval");
prog_fd = skel->progs.kfunc_call_test2.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run(test2)");
- ASSERT_EQ(retval, 3, "test2-retval");
+ ASSERT_EQ(topts.retval, 3, "test2-retval");
+
+ prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+ ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval");
kfunc_call_test_lskel__destroy(skel);
}
@@ -33,17 +41,21 @@ static void test_main(void)
static void test_subprog(void)
{
struct kfunc_call_test_subprog *skel;
- int prog_fd, retval, err;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
skel = kfunc_call_test_subprog__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel"))
return;
prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
- ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
@@ -53,17 +65,21 @@ static void test_subprog(void)
static void test_subprog_lskel(void)
{
struct kfunc_call_test_subprog_lskel *skel;
- int prog_fd, retval, err;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
skel = kfunc_call_test_subprog_lskel__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel"))
return;
prog_fd = skel->progs.kfunc_call_test1.prog_fd;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
- ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index d490ad80eccb..a1ebac70ec29 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -6,11 +6,15 @@
#include "test_ksyms_module.lskel.h"
#include "test_ksyms_module.skel.h"
-void test_ksyms_module_lskel(void)
+static void test_ksyms_module_lskel(void)
{
struct test_ksyms_module_lskel *skel;
- int retval;
int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
if (!env.has_testmod) {
test__skip();
@@ -20,20 +24,24 @@ void test_ksyms_module_lskel(void)
skel = test_ksyms_module_lskel__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
return;
- err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts);
if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto cleanup;
- ASSERT_EQ(retval, 0, "retval");
+ ASSERT_EQ(topts.retval, 0, "retval");
ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
cleanup:
test_ksyms_module_lskel__destroy(skel);
}
-void test_ksyms_module_libbpf(void)
+static void test_ksyms_module_libbpf(void)
{
struct test_ksyms_module *skel;
- int retval, err;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
if (!env.has_testmod) {
test__skip();
@@ -43,11 +51,10 @@ void test_ksyms_module_libbpf(void)
skel = test_ksyms_module__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
return;
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4,
- sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts);
if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto cleanup;
- ASSERT_EQ(retval, 0, "retval");
+ ASSERT_EQ(topts.retval, 0, "retval");
ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
cleanup:
test_ksyms_module__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 540ef28fabff..55f733ff4109 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -23,12 +23,16 @@ static void test_l4lb(const char *file)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
int err, i, prog_fd, map_fd;
__u64 bytes = 0, pkts = 0;
struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
@@ -49,19 +53,24 @@ static void test_l4lb(const char *file)
goto out;
bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf); /* reset out size */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic");
map_fd = bpf_find_map(__func__, obj, "stats");
if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index e469b023962b..1ef377a7e731 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -202,7 +202,7 @@ static void bpf_btf_load_log_buf(void)
const void *raw_btf_data;
__u32 raw_btf_size;
struct btf *btf;
- char *log_buf;
+ char *log_buf = NULL;
int fd = -1;
btf = btf__new_empty();
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 23d19e9cf26a..e4e99b37df64 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -4,14 +4,17 @@
static void *spin_lock_thread(void *arg)
{
- __u32 duration, retval;
int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
pthread_exit(arg);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index 273725504f11..43e502acf050 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -9,10 +9,16 @@
void test_map_ptr(void)
{
struct map_ptr_kern_lskel *skel;
- __u32 duration = 0, retval;
char buf[128];
int err;
int page_size = getpagesize();
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
skel = map_ptr_kern_lskel__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -26,14 +32,12 @@ void test_map_ptr(void)
skel->bss->page_size = page_size;
- err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4,
- sizeof(pkt_v4), buf, NULL, &retval, NULL);
+ err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts);
- if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+ if (!ASSERT_OK(err, "test_run"))
goto cleanup;
- if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
- skel->bss->g_map_type, skel->bss->g_line))
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index b772fe30ce9b..5d9955af6247 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -15,39 +15,31 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
{
struct modify_return *skel = NULL;
int err, prog_fd;
- __u32 duration = 0, retval;
__u16 side_effect;
__s16 ret;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
skel = modify_return__open_and_load();
- if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
err = modify_return__attach(skel);
- if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "modify_return__attach failed"))
goto cleanup;
skel->bss->input_retval = input_retval;
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
- &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
- CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+ side_effect = UPPER(topts.retval);
+ ret = LOWER(topts.retval);
- side_effect = UPPER(retval);
- ret = LOWER(retval);
-
- CHECK(ret != want_ret, "test_run",
- "unexpected ret: %d, expected: %d\n", ret, want_ret);
- CHECK(side_effect != want_side_effect, "modify_return",
- "unexpected side_effect: %d\n", side_effect);
-
- CHECK(skel->bss->fentry_result != 1, "modify_return",
- "fentry failed\n");
- CHECK(skel->bss->fexit_result != 1, "modify_return",
- "fexit failed\n");
- CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
- "fmod_ret failed\n");
+ ASSERT_EQ(ret, want_ret, "test_run ret");
+ ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect");
+ ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result");
+ ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
+ ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
cleanup:
modify_return__destroy(skel);
@@ -63,4 +55,3 @@ void serial_test_modify_return(void)
0 /* want_side_effect */,
-EINVAL /* want_ret */);
}
-
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 6628710ec3c6..0bcccdc34fbc 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -6,23 +6,27 @@ void test_pkt_access(void)
{
const char *file = "./test_pkt_access.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 100000,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv4",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv4 test_run_opts retval");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = 0; /* reset from last call */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv6 test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index c9d2d6a1bfcc..00ee1dd792aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -6,18 +6,20 @@ void test_pkt_md_access(void)
{
const char *file = "./test_pkt_md_access.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
new file mode 100644
index 000000000000..1ccd2bdf8fa8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ return;
+
+ CHECK(run_cnt != info.run_cnt, "run_cnt",
+ "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_opts(void)
+{
+ struct test_pkt_access *skel;
+ int err, stats_fd = -1, prog_fd;
+ char buf[10] = {};
+ __u64 run_cnt = 0;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .repeat = 1,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = 5,
+ );
+
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd"))
+ return;
+
+ skel = test_pkt_access__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_EQ(errno, ENOSPC, "test_run errno");
+ ASSERT_ERR(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out");
+ ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+ topts.data_out = NULL;
+ topts.data_size_out = 0;
+ topts.repeat = 2;
+ errno = 0;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(errno, "run_no_output errno");
+ ASSERT_OK(err, "run_no_output err");
+ ASSERT_OK(topts.retval, "run_no_output retval");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+cleanup:
+ if (skel)
+ test_pkt_access__destroy(skel);
+ if (stats_fd >= 0)
+ close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
deleted file mode 100644
index 89fc98faf19e..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-#include "test_pkt_access.skel.h"
-
-static const __u32 duration;
-
-static void check_run_cnt(int prog_fd, __u64 run_cnt)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- int err;
-
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
- return;
-
- CHECK(run_cnt != info.run_cnt, "run_cnt",
- "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
-}
-
-void test_prog_run_xattr(void)
-{
- struct test_pkt_access *skel;
- int err, stats_fd = -1;
- char buf[10] = {};
- __u64 run_cnt = 0;
-
- struct bpf_prog_test_run_attr tattr = {
- .repeat = 1,
- .data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
- .data_out = buf,
- .data_size_out = 5,
- };
-
- stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
- if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
- return;
-
- skel = test_pkt_access__open_and_load();
- if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
- goto cleanup;
-
- tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
- "incorrect output size, want %zu have %u\n",
- sizeof(pkt_v4), tattr.data_size_out);
-
- CHECK_ATTR(buf[5] != 0, "overflow",
- "BPF_PROG_TEST_RUN ignored size hint\n");
-
- run_cnt += tattr.repeat;
- check_run_cnt(tattr.prog_fd, run_cnt);
-
- tattr.data_out = NULL;
- tattr.data_size_out = 0;
- tattr.repeat = 2;
- errno = 0;
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err || errno || tattr.retval, "run_no_output",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- tattr.data_size_out = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
-
- run_cnt += tattr.repeat;
- check_run_cnt(tattr.prog_fd, run_cnt);
-
-cleanup:
- if (skel)
- test_pkt_access__destroy(skel);
- if (stats_fd >= 0)
- close(stats_fd);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index b9822f914eeb..d2743fc10032 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -10,11 +10,18 @@ enum {
static void test_queue_stack_map_by_type(int type)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE], duration, retval, size, val;
+ __u32 vals[MAP_SIZE], val;
int i, err, prog_fd, map_in_fd, map_out_fd;
char file[32], buf[128];
struct bpf_object *obj;
struct iphdr iph;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
/* Fill test values to be used */
for (i = 0; i < MAP_SIZE; i++)
@@ -58,38 +65,37 @@ static void test_queue_stack_map_by_type(int type)
pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
}
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- if (err || retval || size != sizeof(pkt_v4))
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (err || topts.retval ||
+ topts.data_size_out != sizeof(pkt_v4))
break;
memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
if (iph.daddr != val)
break;
}
- CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val,
- "bpf_map_pop_elem",
- "err %d errno %d retval %d size %d iph->daddr %u\n",
- err, errno, retval, size, iph.daddr);
+ ASSERT_OK(err, "bpf_map_pop_elem");
+ ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "bpf_map_pop_elem data_size_out");
+ ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr");
/* Queue is empty, program should return TC_ACT_SHOT */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
- "check-queue-stack-map-empty",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "check-queue-stack-map-empty");
+ ASSERT_EQ(topts.retval, 2 /* TC_ACT_SHOT */,
+ "check-queue-stack-map-empty test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "check-queue-stack-map-empty data_size_out");
/* Check that the program pushed elements correctly */
for (i = 0; i < MAP_SIZE; i++) {
err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
- if (err || val != vals[i] * 5)
- break;
+ ASSERT_OK(err, "bpf_map_lookup_and_delete_elem");
+ ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val");
}
-
- CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
- "bpf_map_push_elem", "err %d value %u\n", err, val);
-
out:
pkt_v4.iph.saddr = 0;
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index 41720a62c4fa..fe5b8fae2c36 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -5,59 +5,54 @@
#include "bpf/libbpf_internal.h"
#include "test_raw_tp_test_run.skel.h"
-static int duration;
-
void test_raw_tp_test_run(void)
{
- struct bpf_prog_test_run_attr test_attr = {};
int comm_fd = -1, err, nr_online, i, prog_fd;
__u64 args[2] = {0x1234ULL, 0x5678ULL};
int expected_retval = 0x1234 + 0x5678;
struct test_raw_tp_test_run *skel;
char buf[] = "new_name";
bool *online = NULL;
- DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
- .ctx_in = args,
- .ctx_size_in = sizeof(args),
- .flags = BPF_F_TEST_RUN_ON_CPU,
- );
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
&nr_online);
- if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+ if (!ASSERT_OK(err, "parse_cpu_mask_file"))
return;
skel = test_raw_tp_test_run__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
err = test_raw_tp_test_run__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "skel_attach"))
goto cleanup;
comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+ if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
goto cleanup;
err = write(comm_fd, buf, sizeof(buf));
- CHECK(err < 0, "task rename", "err %d", errno);
+ ASSERT_GE(err, 0, "task rename");
- CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
- CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+ ASSERT_NEQ(skel->bss->count, 0, "check_count");
+ ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu");
prog_fd = bpf_program__fd(skel->progs.rename);
- test_attr.prog_fd = prog_fd;
- test_attr.ctx_in = args;
- test_attr.ctx_size_in = sizeof(__u64);
+ opts.ctx_in = args;
+ opts.ctx_size_in = sizeof(__u64);
- err = bpf_prog_test_run_xattr(&test_attr);
- CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_NEQ(err, 0, "test_run should fail for too small ctx");
- test_attr.ctx_size_in = sizeof(args);
- err = bpf_prog_test_run_xattr(&test_attr);
- CHECK(err < 0, "test_run", "err %d\n", errno);
- CHECK(test_attr.retval != expected_retval, "check_retval",
- "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+ opts.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
for (i = 0; i < nr_online; i++) {
if (!online[i])
@@ -66,28 +61,23 @@ void test_raw_tp_test_run(void)
opts.cpu = i;
opts.retval = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err < 0, "test_run_opts", "err %d\n", errno);
- CHECK(skel->data->on_cpu != i, "check_on_cpu",
- "expect %d got %d\n", i, skel->data->on_cpu);
- CHECK(opts.retval != expected_retval,
- "check_retval", "expect 0x%x, got 0x%x\n",
- expected_retval, opts.retval);
+ ASSERT_OK(err, "test_run_opts");
+ ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
}
/* invalid cpu ID should fail with ENXIO */
opts.cpu = 0xffffffff;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err >= 0 || errno != ENXIO,
- "test_run_opts_fail",
- "should failed with ENXIO\n");
+ ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO");
+ ASSERT_ERR(err, "test_run_opts_fail");
/* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
opts.cpu = 1;
opts.flags = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err >= 0 || errno != EINVAL,
- "test_run_opts_fail",
- "should failed with EINVAL\n");
+ ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL");
+ ASSERT_ERR(err, "test_run_opts_fail");
cleanup:
close(comm_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 239baccabccb..f4aa7dab4766 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -56,21 +56,23 @@ void serial_test_raw_tp_writable_test_run(void)
0,
};
- __u32 prog_ret;
- int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = test_skb,
+ .data_size_in = sizeof(test_skb),
+ .repeat = 1,
+ );
+ int err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 42, "test_run",
"tracepoint did not modify return value\n");
- CHECK(prog_ret != 0, "test_run_ret",
+ CHECK(topts.retval != 0, "test_run_ret",
"socket_filter did not return 0\n");
close(tp_fd);
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
- &prog_ret, 0);
+ err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 0, "test_run_notrace",
"test_run failed with %d errno %d\n", err, errno);
- CHECK(prog_ret != 0, "test_run_ret_notrace",
+ CHECK(topts.retval != 0, "test_run_ret_notrace",
"socket_filter did not return 0\n");
out_filterfd:
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index aecfe662c070..70b49da5ca0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -13,10 +13,14 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
struct itimerval timeo = {
.it_value.tv_usec = 100000, /* 100ms */
};
- __u32 duration = 0, retval;
int prog_fd;
int err;
int i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 0xffffffff,
+ );
for (i = 0; i < ARRAY_SIZE(prog); i++)
prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
@@ -24,20 +28,17 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
"GPL", 0, NULL, 0);
- CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+ ASSERT_GE(prog_fd, 0, "test-run load");
err = sigaction(SIGALRM, &sigalrm_action, NULL);
- CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+ ASSERT_OK(err, "test-run-signal-sigaction");
err = setitimer(ITIMER_REAL, &timeo, NULL);
- CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
-
- err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(duration > 500000000, /* 500ms */
- "test-run-signal-duration",
- "duration %dns > 500ms\n",
- duration);
+ ASSERT_OK(err, "test-run-signal-timer");
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_LE(topts.duration, 500000000 /* 500ms */,
+ "test-run-signal-duration");
signal(SIGALRM, SIG_DFL);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index b5319ba2ee27..ce0e555b5e38 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -20,97 +20,72 @@ void test_skb_ctx(void)
.gso_size = 10,
.hwtstamp = 11,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
- int i;
+ int err, prog_fd, i;
- err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
/* ctx_in != NULL, ctx_size_in == 0 */
tattr.ctx_size_in = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_in");
tattr.ctx_size_in = sizeof(skb);
/* ctx_out != NULL, ctx_size_out == 0 */
tattr.ctx_size_out = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_out");
tattr.ctx_size_out = sizeof(skb);
/* non-zero [len, tc_index] fields should be rejected*/
skb.len = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "len");
skb.len = 0;
skb.tc_index = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "tc_index");
skb.tc_index = 0;
/* non-zero [hash, sk] fields should be rejected */
skb.hash = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "hash");
skb.hash = 0;
skb.sk = (struct bpf_sock *)1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "sk");
skb.sk = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0 || tattr.retval,
- "run",
- "err %d errno %d retval %d\n",
- err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
- "ctx_size_out",
- "incorrect output size, want %zu have %u\n",
- sizeof(skb), tattr.ctx_size_out);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(tattr.retval, "test_run retval");
+ ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out");
for (i = 0; i < 5; i++)
- CHECK_ATTR(skb.cb[i] != i + 2,
- "ctx_out_cb",
- "skb->cb[i] == %d, expected %d\n",
- skb.cb[i], i + 2);
- CHECK_ATTR(skb.priority != 7,
- "ctx_out_priority",
- "skb->priority == %d, expected %d\n",
- skb.priority, 7);
- CHECK_ATTR(skb.ifindex != 1,
- "ctx_out_ifindex",
- "skb->ifindex == %d, expected %d\n",
- skb.ifindex, 1);
- CHECK_ATTR(skb.ingress_ifindex != 11,
- "ctx_out_ingress_ifindex",
- "skb->ingress_ifindex == %d, expected %d\n",
- skb.ingress_ifindex, 11);
- CHECK_ATTR(skb.tstamp != 8,
- "ctx_out_tstamp",
- "skb->tstamp == %lld, expected %d\n",
- skb.tstamp, 8);
- CHECK_ATTR(skb.mark != 10,
- "ctx_out_mark",
- "skb->mark == %u, expected %d\n",
- skb.mark, 10);
+ ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb");
+ ASSERT_EQ(skb.priority, 7, "ctx_out_priority");
+ ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex");
+ ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex");
+ ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp");
+ ASSERT_EQ(skb.mark, 10, "ctx_out_mark");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index 6f802a1c0800..97dc8b14be48 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -9,22 +9,22 @@ void test_skb_helpers(void)
.gso_segs = 8,
.gso_size = 10,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
+ int err, prog_fd;
- err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_helpers.o",
+ BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9fc040eaa482..9d211b5c22c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
+#define _GNU_SOURCE
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
+#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
@@ -20,6 +22,7 @@
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
@@ -42,8 +45,16 @@ static __u64 child_cg_id;
static int linum_map_fd;
static __u32 duration;
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+static bool create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return false;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return false;
+
+ return true;
+}
static void print_sk(const struct bpf_sock *sk, const char *prefix)
{
@@ -91,19 +102,24 @@ static void check_result(void)
{
struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
struct bpf_sock srv_sk, cli_sk, listen_sk;
- __u32 ingress_linum, egress_linum;
+ __u32 idx, ingress_linum, egress_linum, linum;
int err;
- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
- &egress_linum);
+ idx = EGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
- &ingress_linum);
+ idx = INGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
+ idx = READ_SK_DST_PORT_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
+ ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
+ ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
+
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
@@ -262,7 +278,7 @@ static void test(void)
char buf[DATA_LEN];
/* Prepare listen_fd */
- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
/* start_server() has logged the error details */
if (CHECK_FAIL(listen_fd == -1))
goto done;
@@ -330,8 +346,12 @@ done:
void serial_test_sock_fields(void)
{
- struct bpf_link *egress_link = NULL, *ingress_link = NULL;
int parent_cg_fd = -1, child_cg_fd = -1;
+ struct bpf_link *link;
+
+ /* Use a dedicated netns to have a fixed listen port */
+ if (!create_netns())
+ return;
/* Create a cgroup, get fd, and join it */
parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
@@ -352,15 +372,20 @@ void serial_test_sock_fields(void)
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
goto done;
- egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
- child_cg_fd);
- if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
+ link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
+ goto done;
+ skel->links.egress_read_sock_fields = link;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
goto done;
+ skel->links.ingress_read_sock_fields = link;
- ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
- child_cg_fd);
- if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
+ link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
goto done;
+ skel->links.read_sk_dst_port = link;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
@@ -369,8 +394,7 @@ void serial_test_sock_fields(void)
test();
done:
- bpf_link__destroy(egress_link);
- bpf_link__destroy(ingress_link);
+ test_sock_fields__detach(skel);
test_sock_fields__destroy(skel);
if (child_cg_fd >= 0)
close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 85db0f4cdd95..cec5c0882372 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -8,6 +8,7 @@
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -139,12 +140,16 @@ out:
static void test_sockmap_update(enum bpf_map_type map_type)
{
- struct bpf_prog_test_run_attr tattr;
int err, prog, src, duration = 0;
struct test_sockmap_update *skel;
struct bpf_map *dst_map;
const __u32 zero = 0;
char dummy[14] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ .repeat = 1,
+ );
__s64 sk;
sk = connected_socket_v4();
@@ -166,16 +171,10 @@ static void test_sockmap_update(enum bpf_map_type map_type)
if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
goto out;
- tattr = (struct bpf_prog_test_run_attr){
- .prog_fd = prog,
- .repeat = 1,
- .data_in = dummy,
- .data_size_in = sizeof(dummy),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
- "errno=%u retval=%u\n", errno, tattr.retval))
+ err = bpf_prog_test_run_opts(prog, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ goto out;
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
goto out;
compare_cookies(skel->maps.src, dst_map);
@@ -315,6 +314,63 @@ out:
test_sockmap_skb_verdict_attach__destroy(skel);
}
+static __u32 query_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+ !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+ return 0;
+
+ return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+ struct test_sockmap_progs_query *skel;
+ int err, map_fd, verdict_fd;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+
+ skel = test_sockmap_progs_query__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ if (attach_type == BPF_SK_MSG_VERDICT)
+ verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+ else
+ verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+ err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+ goto out;
+
+ prog_cnt = 1;
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+ ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+ "wrong prog_ids on query");
+
+ bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+ test_sockmap_progs_query__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +397,12 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
BPF_SK_SKB_VERDICT);
}
+ if (test__start_subtest("sockmap msg_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+ if (test__start_subtest("sockmap stream_parser progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+ if (test__start_subtest("sockmap stream_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 7e21bfab6358..2cf0c7a3fe23 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1413,14 +1413,12 @@ close_srv1:
static void test_ops_cleanup(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
int err, mapfd;
u32 key;
- def = bpf_map__def(map);
mapfd = bpf_map__fd(map);
- for (key = 0; key < def->max_entries; key++) {
+ for (key = 0; key < bpf_map__max_entries(map); key++) {
err = bpf_map_delete_elem(mapfd, &key);
if (err && errno != EINVAL && errno != ENOENT)
FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
static const char *map_type_str(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
+ int type;
- def = bpf_map__def(map);
- if (IS_ERR(def))
+ if (!map)
return "invalid";
+ type = bpf_map__type(map);
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
return "sockmap";
case BPF_MAP_TYPE_SOCKHASH:
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 4b937e5dbaca..30a99d2ed5c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
}
memset(&buf, 0, sizeof(buf));
- buf.zc.address = 12345; /* rejected by BPF */
+ buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
optlen = sizeof(buf.zc);
errno = 0;
err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
- if (errno != EPERM) {
+ if (errno != EINVAL) {
log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
err, errno);
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 6307f5d2b417..8e329eaee6d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -4,14 +4,16 @@
static void *spin_lock_thread(void *arg)
{
- __u32 duration, retval;
int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
pthread_exit(arg);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0a91d8d9954b..f45a1d7b0a28 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -42,7 +42,7 @@ retry:
return;
/* override program type */
- bpf_program__set_perf_event(skel->progs.oncpu);
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index 81e997a69f7a..f4d40001155a 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -20,20 +20,20 @@ void test_syscall(void)
.log_buf = (uintptr_t) verifier_log,
.log_size = sizeof(verifier_log),
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.ctx_in = &ctx,
.ctx_size_in = sizeof(ctx),
- };
+ );
struct syscall *skel = NULL;
__u64 key = 12, value = 0;
- int err;
+ int err, prog_fd;
skel = syscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
- tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
- err = bpf_prog_test_run_xattr(&tattr);
+ prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
ASSERT_EQ(err, 0, "err");
ASSERT_EQ(tattr.retval, 1, "retval");
ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 5dc0f425bd11..c4da87ec3ba4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -12,9 +12,13 @@ static void test_tailcall_1(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
@@ -37,7 +41,7 @@ static void test_tailcall_1(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,23 +57,21 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -85,13 +87,12 @@ static void test_tailcall_1(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,33 +108,30 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != j, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, j, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err >= 0 || errno != ENOENT))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
@@ -150,9 +148,13 @@ static void test_tailcall_2(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
@@ -175,7 +177,7 @@ static void test_tailcall_2(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -191,30 +193,27 @@ static void test_tailcall_2(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 2, "tailcall retval");
i = 2;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -225,8 +224,12 @@ static void test_tailcall_count(const char *which)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
@@ -262,10 +265,9 @@ static void test_tailcall_count(const char *which)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -277,18 +279,17 @@ static void test_tailcall_count(const char *which)
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -319,10 +320,14 @@ static void test_tailcall_4(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
@@ -353,7 +358,7 @@ static void test_tailcall_4(void)
if (CHECK_FAIL(map_fd < 0))
return;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,18 +374,17 @@ static void test_tailcall_4(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -389,10 +393,9 @@ static void test_tailcall_4(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
bpf_object__close(obj);
@@ -407,10 +410,14 @@ static void test_tailcall_5(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
@@ -441,7 +448,7 @@ static void test_tailcall_5(void)
if (CHECK_FAIL(map_fd < 0))
return;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,18 +464,17 @@ static void test_tailcall_5(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -477,10 +483,9 @@ static void test_tailcall_5(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
bpf_object__close(obj);
@@ -495,8 +500,12 @@ static void test_tailcall_bpf2bpf_1(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
@@ -520,7 +529,7 @@ static void test_tailcall_bpf2bpf_1(void)
goto out;
/* nop -> jmp */
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -536,10 +545,9 @@ static void test_tailcall_bpf2bpf_1(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != 1, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
/* jmp -> nop, call subprog that will do tailcall */
i = 1;
@@ -547,10 +555,9 @@ static void test_tailcall_bpf2bpf_1(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
/* make sure that subprog can access ctx and entry prog that
* called this subprog can properly return
@@ -560,11 +567,9 @@ static void test_tailcall_bpf2bpf_1(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != sizeof(pkt_v4) * 2,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -579,8 +584,12 @@ static void test_tailcall_bpf2bpf_2(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
@@ -616,10 +625,9 @@ static void test_tailcall_bpf2bpf_2(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -631,18 +639,17 @@ static void test_tailcall_bpf2bpf_2(void)
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -657,8 +664,12 @@ static void test_tailcall_bpf2bpf_3(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
@@ -681,7 +692,7 @@ static void test_tailcall_bpf2bpf_3(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -697,33 +708,27 @@ static void test_tailcall_bpf2bpf_3(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 3,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
i = 1;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4),
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 2,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -754,8 +759,12 @@ static void test_tailcall_bpf2bpf_4(bool noise)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &prog_fd);
@@ -778,7 +787,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -809,15 +818,14 @@ static void test_tailcall_bpf2bpf_4(bool noise)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val.count);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val.count, 31, "tailcall count");
out:
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 37c20b5ffa70..61935e7e056a 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -3,18 +3,22 @@
#include <test_progs.h>
#include "test_task_pt_regs.skel.h"
+/* uprobe attach point */
+static void trigger_func(void)
+{
+ asm volatile ("");
+}
+
void test_task_pt_regs(void)
{
struct test_task_pt_regs *skel;
struct bpf_link *uprobe_link;
- size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t uprobe_offset;
bool match;
- base_addr = get_base_addr();
- if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
return;
- uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
skel = test_task_pt_regs__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -32,7 +36,7 @@ void test_task_pt_regs(void)
skel->links.handle_uprobe = uprobe_link;
/* trigger & validate uprobe */
- get_base_addr();
+ trigger_func();
if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
new file mode 100644
index 000000000000..c381faaae741
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <sys/prctl.h>
+#include <test_progs.h>
+#include "bpf_syscall_macro.skel.h"
+
+void test_bpf_syscall_macro(void)
+{
+ struct bpf_syscall_macro *skel = NULL;
+ int err;
+ int exp_arg1 = 1001;
+ unsigned long exp_arg2 = 12;
+ unsigned long exp_arg3 = 13;
+ unsigned long exp_arg4 = 14;
+ unsigned long exp_arg5 = 15;
+
+ /* check whether it can open program */
+ skel = bpf_syscall_macro__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open"))
+ return;
+
+ skel->rodata->filter_pid = getpid();
+
+ /* check whether it can load program */
+ err = bpf_syscall_macro__load(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__load"))
+ goto cleanup;
+
+ /* check whether it can attach kprobe */
+ err = bpf_syscall_macro__attach(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__attach"))
+ goto cleanup;
+
+ /* check whether args of syscall are copied correctly */
+ prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+#if defined(__aarch64__) || defined(__s390__)
+ ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#else
+ ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#endif
+ ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
+ ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#else
+ ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#endif
+ ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4");
+ ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5");
+
+ /* check whether args of syscall are copied correctly for CORE variants */
+ ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant");
+ ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant");
+ ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#else
+ ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#endif
+ ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
+ ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
+
+ ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+ ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+ ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+ ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+ ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
+cleanup:
+ bpf_syscall_macro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
index 4ca275101ee0..de24e8f0e738 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_profiler.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -8,20 +8,20 @@
static int sanity_run(struct bpf_program *prog)
{
- struct bpf_prog_test_run_attr test_attr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, test_attr);
__u64 args[] = {1, 2, 3};
- __u32 duration = 0;
int err, prog_fd;
prog_fd = bpf_program__fd(prog);
- test_attr.prog_fd = prog_fd;
test_attr.ctx_in = args;
test_attr.ctx_size_in = sizeof(args);
- err = bpf_prog_test_run_xattr(&test_attr);
- if (CHECK(err || test_attr.retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, test_attr.retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &test_attr);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+
+ if (!ASSERT_OK(test_attr.retval, "test_run retval"))
return -1;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index cf1215531920..ae93411fd582 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -6,15 +6,18 @@
static int sanity_run(struct bpf_program *prog)
{
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
prog_fd = bpf_program__fd(prog);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval != 123, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+ if (!ASSERT_EQ(topts.retval, 123, "test_run retval"))
return -1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 0f4e49e622cd..7eb049214859 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -6,7 +6,7 @@
static int timer(struct timer *timer_skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
@@ -16,10 +16,9 @@ static int timer(struct timer *timer_skel)
ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
prog_fd = bpf_program__fd(timer_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
timer__detach(timer_skel);
usleep(50); /* 10 usecs should be enough, but give it extra */
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 949a0617869d..2ee5f5ae11d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -6,19 +6,18 @@
static int timer_mim(struct timer_mim *timer_skel)
{
- __u32 duration = 0, retval;
__u64 cnt1, cnt2;
int err, prog_fd, key1 = 1;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = timer_mim__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
return err;
prog_fd = bpf_program__fd(timer_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
- ASSERT_EQ(retval, 0, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
timer_mim__detach(timer_skel);
/* check that timer_cb[12] are incrementing 'cnt' */
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
index 924441d4362d..aabdff7bea3e 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_ext.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -23,8 +23,12 @@ void test_trace_ext(void)
int err, pkt_fd, ext_fd;
struct bpf_program *prog;
char buf[100];
- __u32 retval;
__u64 len;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
/* open/load/attach test_pkt_md_access */
skel_pkt = test_pkt_md_access__open_and_load();
@@ -77,32 +81,32 @@ void test_trace_ext(void)
/* load/attach tracing */
err = test_trace_ext_tracing__load(skel_trace);
- if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) {
libbpf_strerror(err, buf, sizeof(buf));
fprintf(stderr, "%s\n", buf);
goto cleanup;
}
err = test_trace_ext_tracing__attach(skel_trace);
- if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach"))
goto cleanup;
/* trigger the test */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
bss_ext = skel_ext->bss;
bss_trace = skel_trace->bss;
len = bss_ext->ext_called;
- CHECK(bss_ext->ext_called == 0,
- "check", "failed to trigger freplace/test_pkt_md_access\n");
- CHECK(bss_trace->fentry_called != len,
- "check", "failed to trigger fentry/test_pkt_md_access_new\n");
- CHECK(bss_trace->fexit_called != len,
- "check", "failed to trigger fexit/test_pkt_md_access_new\n");
+ ASSERT_NEQ(bss_ext->ext_called, 0,
+ "failed to trigger freplace/test_pkt_md_access");
+ ASSERT_EQ(bss_trace->fentry_called, len,
+ "failed to trigger fentry/test_pkt_md_access_new");
+ ASSERT_EQ(bss_trace->fexit_called, len,
+ "failed to trigger fexit/test_pkt_md_access_new");
cleanup:
test_trace_ext_tracing__destroy(skel_trace);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index ac65456b7ab8..ec21c53cb1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -13,8 +13,14 @@ void test_xdp(void)
char buf[128];
struct ipv6hdr iph6;
struct iphdr iph;
- __u32 duration, retval, size;
int err, prog_fd, map_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
@@ -26,21 +32,23 @@ void test_xdp(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
bpf_map_update_elem(map_fd, &key6, &value6, 0);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
- CHECK(err || retval != XDP_TX || size != 74 ||
- iph.protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out");
+ ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol");
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
- CHECK(err || retval != XDP_TX || size != 114 ||
- iph6.nexthdr != IPPROTO_IPV6, "ipv6",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out");
+ ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr");
out:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644
index 000000000000..d18e6f343c48
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+static void test_xdp_update_frags(void)
+{
+ const char *file = "./test_xdp_update_frags.o";
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u32 *offset;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(128);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+ goto out;
+
+ memset(buf, 0, 128);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa; /* marker at offset 16 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 128;
+ topts.data_size_out = 128;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+ ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+ free(buf);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 5000;
+ buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+ ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 3510;
+ buf[*offset] = 0xaa; /* marker at offset 3510 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+ ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 7606;
+ buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+ ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+ if (test__start_subtest("xdp_adjust_frags"))
+ test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3f5a17c38be5..21ceac24e174 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -5,28 +5,35 @@
static void test_xdp_adjust_tail_shrink(void)
{
const char *file = "./test_xdp_adjust_tail_shrink.o";
- __u32 duration, retval, size, expect_sz;
+ __u32 expect_sz;
struct bpf_object *obj;
int err, prog_fd;
char buf[128];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
+
bpf_object__close(obj);
}
@@ -35,25 +42,31 @@ static void test_xdp_adjust_tail_grow(void)
const char *file = "./test_xdp_adjust_tail_grow.o";
struct bpf_object *obj;
char buf[4096]; /* avoid segfault: large buf to hold grow results */
- __u32 duration, retval, size, expect_sz;
+ __u32 expect_sz;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
bpf_object__close(obj);
}
@@ -65,18 +78,18 @@ static void test_xdp_adjust_tail_grow2(void)
int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
struct bpf_object *obj;
int err, cnt, i;
- int max_grow;
+ int max_grow, prog_fd;
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.repeat = 1,
.data_in = &buf,
.data_out = &buf,
.data_size_in = 0, /* Per test */
.data_size_out = 0, /* Per test */
- };
+ );
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
/* Test case-64 */
@@ -84,49 +97,171 @@ static void test_xdp_adjust_tail_grow2(void)
tattr.data_size_in = 64; /* Determine test case via pkt size */
tattr.data_size_out = 128; /* Limit copy_size */
/* Kernel side alloc packet memory area that is zero init */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
- CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
- || tattr.retval != XDP_TX
- || tattr.data_size_out != 192, /* Expected grow size */
- "case-64",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+ ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
/* Extra checks for data contents */
- CHECK_ATTR(tattr.data_size_out != 192
- || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
- || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
- || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
- "case-64-data",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */
+ ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+ ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+ ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+ ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+ ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
/* Test case-128 */
memset(buf, 2, sizeof(buf));
tattr.data_size_in = 128; /* Determine test case via pkt size */
tattr.data_size_out = sizeof(buf); /* Copy everything */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
- CHECK_ATTR(err
- || tattr.retval != XDP_TX
- || tattr.data_size_out != max_grow,/* Expect max grow size */
- "case-128",
- "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, max_grow);
+ ASSERT_OK(err, "case-128");
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
/* Extra checks for data content: Count grow size, will contain zeros */
for (i = 0, cnt = 0; i < sizeof(buf); i++) {
if (buf[i] == 0)
cnt++;
}
- CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
- || tattr.data_size_out != max_grow, /* Total grow size */
- "case-128-data",
- "err %d errno %d retval %d size %d grow-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, cnt);
+ ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_shrink(void)
+{
+ const char *file = "./test_xdp_adjust_tail_shrink.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* For the individual test cases, the first byte in the packet
+ * indicates which test will be run.
+ */
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+
+ /* Test case removing 10 bytes from last frag, NOT freeing it */
+ exp_size = 8990; /* 9000 - 10 */
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size");
+
+ /* Test case removing one of two pages, assuming 4K pages */
+ buf[0] = 1;
+ exp_size = 4900; /* 9000 - 4100 */
+
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-4Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size");
+
+ /* Test case removing two pages resulting in a linear xdp_buff */
+ buf[0] = 2;
+ exp_size = 800; /* 9000 - 8200 */
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-9Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_grow(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(16384);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 16384);
+ exp_size = 9000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+
+ for (i = 0; i < 9000; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+ for (i = 9000; i < 9010; i++)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+ for (i = 9010; i < 16384; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+ /* Test a too large grow */
+ memset(buf, 1, 16384);
+ exp_size = 9001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 9001;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+ free(buf);
+out:
bpf_object__close(obj);
}
@@ -138,4 +273,8 @@ void test_xdp_adjust_tail(void)
test_xdp_adjust_tail_grow();
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
+ if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+ test_xdp_adjust_frags_tail_shrink();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+ test_xdp_adjust_frags_tail_grow();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index c6fa390e3aa1..62aa3edda5e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -11,8 +11,7 @@ void serial_test_xdp_attach(void)
const char *file = "./test_xdp.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
- DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
- .old_fd = -1);
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
len = sizeof(info);
@@ -38,49 +37,47 @@ void serial_test_xdp_attach(void)
if (CHECK_FAIL(err))
goto out_2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
if (CHECK(err, "load_ok", "initial load failed"))
goto out_close;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id1, "id1_check",
"loaded prog id %u != id1 %u, err %d", id0, id1, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
goto out;
- opts.old_fd = fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+ opts.old_prog_fd = fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id2, "id2_check",
"loaded prog id %u != id2 %u, err %d", id0, id2, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
goto out;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
goto out;
- opts.old_fd = fd2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ opts.old_prog_fd = fd2;
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != 0, "unload_check",
"loaded prog id %u != 0, err %d", id0, err))
goto out_close;
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj3);
out_2:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index c98a897ad692..76967d8ace9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,40 +10,101 @@ struct meta {
int pkt_len;
};
+struct test_ctx_s {
+ bool passed;
+ int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
- int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+ unsigned char *raw_pkt = data + sizeof(*meta);
+ struct test_ctx_s *tst_ctx = ctx;
+
+ ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+ ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+ ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+ ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+ "check_packet_content");
+
+ if (meta->pkt_len > sizeof(pkt_v4)) {
+ for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+ ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+ "check_packet_content");
+ }
+
+ tst_ctx->passed = true;
+}
- if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
- "check_size", "size %u < %zu\n",
- size, sizeof(pkt_v4) + sizeof(*meta)))
- return;
+#define BUF_SZ 9000
- if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
- "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+ struct test_xdp_bpf2bpf *ftrace_skel,
+ int pkt_size)
+{
+ __u8 *buf, *buf_in;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+ !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
return;
- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ buf_in = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
return;
- if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
- "check_packet_content", "content not the same\n"))
+ buf = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+ free(buf_in);
return;
+ }
+
+ test_ctx.passed = false;
+ test_ctx.pkt_size = pkt_size;
+
+ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+ if (pkt_size > sizeof(pkt_v4)) {
+ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+ buf_in[i + sizeof(pkt_v4)] = i;
+ }
+
+ /* Run test program */
+ topts.data_in = buf_in;
+ topts.data_size_in = pkt_size;
+ topts.data_out = buf;
+ topts.data_size_out = BUF_SZ;
+
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval");
+ ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size");
+
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
+
+ ASSERT_GE(err, 0, "perf_buffer__poll");
+ ASSERT_TRUE(test_ctx.passed, "test passed");
+ /* Verify test results */
+ ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+ "fentry result");
+ ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
- *(bool *)ctx = true;
+ free(buf);
+ free(buf_in);
}
void test_xdp_bpf2bpf(void)
{
- __u32 duration = 0, retval, size;
- char buf[128];
int err, pkt_fd, map_fd;
- bool passed = false;
- struct iphdr iph;
- struct iptnl_info value4 = {.family = AF_INET};
+ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+ struct iptnl_info value4 = {.family = AF_INET6};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -52,7 +113,7 @@ void test_xdp_bpf2bpf(void)
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+ if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
return;
pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +123,7 @@ void test_xdp_bpf2bpf(void)
/* Load trace program */
ftrace_skel = test_xdp_bpf2bpf__open();
- if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
goto out;
/* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,50 +138,24 @@ void test_xdp_bpf2bpf(void)
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
err = test_xdp_bpf2bpf__load(ftrace_skel);
- if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
goto out;
err = test_xdp_bpf2bpf__attach(ftrace_skel);
- if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
goto out;
/* Set up perf buffer */
- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
- on_sample, NULL, &passed, NULL);
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+ on_sample, NULL, &test_ctx, NULL);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
- /* Run test program */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
- if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph.protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size))
- goto out;
-
- /* Make sure bpf_xdp_output() was triggered and it sent the expected
- * data to the perf ring buffer.
- */
- err = perf_buffer__poll(pb, 100);
- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
- goto out;
-
- CHECK_FAIL(!passed);
-
- /* Verify test results */
- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
- "result", "fentry failed err %llu\n",
- ftrace_skel->bss->test_result_fentry))
- goto out;
-
- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+ pkt_sizes[i]);
out:
- if (pb)
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index fd812bd43600..f775a1613833 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -3,11 +3,12 @@
#include <linux/if_link.h>
#include <test_progs.h>
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
#include "test_xdp_with_cpumap_helpers.skel.h"
#define IFINDEX_LO 1
-void serial_test_xdp_cpumap_attach(void)
+static void test_xdp_with_cpumap_helpers(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
@@ -23,11 +24,11 @@ void serial_test_xdp_cpumap_attach(void)
return;
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
goto out_close;
- err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
ASSERT_OK(err, "XDP program detach");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
@@ -45,15 +46,76 @@ void serial_test_xdp_cpumap_attach(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* can not attach BPF_XDP_CPUMAP program to a device */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+ /* Try to attach BPF_XDP program with frags to cpumap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
+
+static void test_xdp_with_cpumap_frags_helpers(void)
+{
+ struct test_xdp_with_cpumap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ int err, frags_prog_fd, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+ return;
+
+ frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = frags_prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to cpumap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to cpumap entry prog_id");
+
+ /* Try to attach BPF_XDP program to cpumap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+ test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("CPUMAP with programs in entries"))
+ test_xdp_with_cpumap_helpers();
+
+ if (test__start_subtest("CPUMAP with frags programs in entries"))
+ test_xdp_with_cpumap_frags_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 3079d5568f8f..ead40016c324 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -4,6 +4,7 @@
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
#define IFINDEX_LO 1
@@ -25,11 +26,11 @@ static void test_xdp_with_devmap_helpers(void)
return;
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
goto out_close;
- err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
@@ -47,15 +48,24 @@ static void test_xdp_with_devmap_helpers(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
/* can not attach BPF_XDP_DEVMAP program to a device */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
+ /* Try to attach BPF_XDP program with frags to devmap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
}
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
}
}
+static void test_xdp_with_devmap_frags_helpers(void)
+{
+ struct test_xdp_with_devmap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ int err, dm_fd_frags, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ return;
+
+ dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd_frags;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add frags program to devmap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to devmap entry prog_id");
+
+ /* Try to attach BPF_XDP program to devmap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+ test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
void serial_test_xdp_devmap_attach(void)
{
if (test__start_subtest("DEVMAP with programs in entries"))
test_xdp_with_devmap_helpers();
+ if (test__start_subtest("DEVMAP with frags programs in entries"))
+ test_xdp_with_devmap_frags_helpers();
+
if (test__start_subtest("Verifier check of DEVMAP programs"))
test_neg_xdp_devmap_helpers();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index abe48e82e1dc..0d01ff6cb91a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -14,13 +14,13 @@ void serial_test_xdp_info(void)
/* Get prog_id for XDP_ATTACHED_NONE mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
return;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -37,32 +37,32 @@ void serial_test_xdp_info(void)
if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
goto out_close;
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
goto out_close;
/* Get prog_id for single prog mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
goto out;
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index b2b357f8c74c..3e9d5c5521f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,9 +8,9 @@
void serial_test_xdp_link(void)
{
- DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
__u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct bpf_link_info link_info;
struct bpf_prog_info prog_info;
struct bpf_link *link;
@@ -41,12 +41,12 @@ void serial_test_xdp_link(void)
id2 = prog_info.id;
/* set initial prog attachment */
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
if (!ASSERT_OK(err, "fd_attach"))
goto cleanup;
/* validate prog ID */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
@@ -55,14 +55,14 @@ void serial_test_xdp_link(void)
if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
/* best-effort detach prog */
- opts.old_fd = prog_fd1;
- bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ opts.old_prog_fd = prog_fd1;
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
goto cleanup;
}
/* detach BPF program */
- opts.old_fd = prog_fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
if (!ASSERT_OK(err, "prog_detach"))
goto cleanup;
@@ -73,23 +73,23 @@ void serial_test_xdp_link(void)
skel1->links.xdp_handler = link;
/* validate prog ID */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
/* BPF prog attach is not allowed to replace BPF link */
- opts.old_fd = prog_fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
if (!ASSERT_ERR(err, "prog_attach_fail"))
goto cleanup;
/* Can't force-update when BPF link is active */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
if (!ASSERT_ERR(err, "prog_update_fail"))
goto cleanup;
/* Can't force-detach when BPF link is active */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
if (!ASSERT_ERR(err, "prog_detach_fail"))
goto cleanup;
@@ -109,7 +109,7 @@ void serial_test_xdp_link(void)
goto cleanup;
skel2->links.xdp_handler = link;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 0281095de266..92ef0aa50866 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -25,43 +25,49 @@ void test_xdp_noinline(void)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration = 0, retval, size;
int err, i;
__u64 bytes = 0, pkts = 0;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
skel = test_xdp_noinline__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
- NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts);
+ ASSERT_OK(err, "ipv4 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic");
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
- NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_out = buf;
+ topts.data_size_out = sizeof(buf);
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic");
bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
- "stats", "bytes %lld pkts %lld\n",
- (unsigned long long)bytes, (unsigned long long)pkts);
+ ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes");
+ ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts");
test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 15a3900e4370..f543d1bd21b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -4,22 +4,25 @@
void test_xdp_perf(void)
{
const char *file = "./xdp_dummy.o";
- __u32 duration, retval, size;
struct bpf_object *obj;
char in[128], out[128];
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = in,
+ .data_size_in = sizeof(in),
+ .data_out = out,
+ .data_size_out = sizeof(out),
+ .repeat = 1000000,
+ );
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
- out, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_PASS || size != 128,
- "xdp-perf",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval");
+ ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
index d9a88dd1ea65..7efcbdbe772d 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <stdbool.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -87,7 +88,7 @@ bloom_callback(struct bpf_map *map, __u32 *key, void *val,
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bloom_lookup(void *ctx)
{
struct callback_ctx data;
@@ -100,7 +101,7 @@ int bloom_lookup(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bloom_update(void *ctx)
{
struct callback_ctx data;
@@ -113,7 +114,7 @@ int bloom_update(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bloom_hashmap_lookup(void *ctx)
{
__u64 *result;
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
index 1316f3db79d9..f245fcfe0c61 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -51,7 +52,7 @@ check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int inner_map(void *ctx)
{
struct bpf_map *inner_map;
@@ -70,7 +71,7 @@ int inner_map(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int check_bloom(void *ctx)
{
struct callback_ctx data;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..eafc877ea460
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+ int i;
+
+ for (i = 0; i < AUTOBIND_LEN; i++) {
+ if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ int i, err;
+
+ if (!unix_sk || !unix_sk->addr)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ if (cmpname(unix_sk))
+ return 0;
+
+ for (i = 0; i < NR_CASES; i++) {
+ err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_setsockopt[i],
+ sizeof(sndbuf_setsockopt[i]));
+ if (err)
+ break;
+
+ err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_getsockopt[i],
+ sizeof(sndbuf_getsockopt[i]));
+ if (err)
+ break;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..d22741272692 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
return 0;
}
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static const char info[] = " === END ===";
+ struct pt_regs *regs;
+ void *ptr;
+ uint32_t user_data = 0;
+ int ret;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ ++num_expected_failure_copy_from_user_task;
+ } else {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Try to read the contents of the task's instruction pointer from the
+ * remote task's address space.
+ */
+ regs = (struct pt_regs *)bpf_task_pt_regs(task);
+ if (regs == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ ptr = (void *)PT_REGS_IP(regs);
+
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ ++num_success_copy_from_user_task;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid data\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
index c21e3f545371..e6aefae38894 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
BPF_SEQ_PRINTF(seq, " @");
for (i = 1; i < len; i++) {
- /* unix_mkname() tests this upper bound. */
+ /* unix_validate_addr() tests this upper bound. */
if (i >= sizeof(struct sockaddr_un))
break;
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index 12349e4601e8..e08565282759 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -3,6 +3,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -53,7 +54,7 @@ static int nested_callback1(__u32 index, void *data)
return 0;
}
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int test_prog(void *ctx)
{
struct callback_ctx data = {};
@@ -71,7 +72,7 @@ int test_prog(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int prog_null_ctx(void *ctx)
{
if (bpf_get_current_pid_tgid() >> 32 != pid)
@@ -82,7 +83,7 @@ int prog_null_ctx(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int prog_invalid_flags(void *ctx)
{
struct callback_ctx data = {};
@@ -95,7 +96,7 @@ int prog_invalid_flags(void *ctx)
return 0;
}
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int prog_nested_calls(void *ctx)
{
struct callback_ctx data = {};
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 9dafdc244462..4ce76eb064c4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -3,6 +3,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -14,7 +15,7 @@ static int empty_callback(__u32 index, void *data)
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int benchmark(void *ctx)
{
for (int i = 0; i < 1000; i++) {
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
new file mode 100644
index 000000000000..5bb11fe595a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_MISC_H__
+#define __BPF_MISC_H__
+
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX "__se_"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644
index 000000000000..82a5c6c6ba83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+ /* thread to activate trace programs for */
+ pid_t tgid;
+ /* return error from __init function */
+ int inject_error;
+ /* uffd monitored range start address */
+ void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ * load_module()
+ * prepare_coming_module()
+ * notifier_call(MODULE_STATE_COMING)
+ * btf_parse_module()
+ * btf_alloc_id() // Visible to userspace at this point
+ * list_add(btf_mod->list, &btf_modules)
+ * do_init_module()
+ * freeinit = kmalloc()
+ * ret = mod->init()
+ * bpf_prog_widen_race()
+ * bpf_copy_from_user()
+ * ...<sleep>...
+ * if (ret < 0)
+ * ...
+ * free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module_live == false
+ * return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module == true
+ * <store module reference in btf_kfunc_tab or used_btf array>
+ * ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+ char dst;
+
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we will attempt to block */
+ bpf_blocking = 1;
+ bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+ return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we finished blocking */
+ bpf_blocking = 2;
+ return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+ res_try_get_module = !!mod;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
new file mode 100644
index 000000000000..05838ed9b89c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <vmlinux.h>
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int arg1 = 0;
+unsigned long arg2 = 0;
+unsigned long arg3 = 0;
+unsigned long arg4_cx = 0;
+unsigned long arg4 = 0;
+unsigned long arg5 = 0;
+
+int arg1_core = 0;
+unsigned long arg2_core = 0;
+unsigned long arg3_core = 0;
+unsigned long arg4_core_cx = 0;
+unsigned long arg4_core = 0;
+unsigned long arg5_core = 0;
+
+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
+const volatile pid_t filter_pid = 0;
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE(handle_sys_prctl)
+{
+ struct pt_regs *real_regs;
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ unsigned long tmp = 0;
+
+ if (pid != filter_pid)
+ return 0;
+
+ real_regs = PT_REGS_SYSCALL_REGS(ctx);
+
+ /* test for PT_REGS_PARM */
+
+#if !defined(bpf_target_arm64) && !defined(bpf_target_s390)
+ bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+#endif
+ arg1 = tmp;
+ bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
+ bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs));
+
+ /* test for the CORE variant of PT_REGS_PARM */
+ arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs);
+ arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs);
+ arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs);
+ arg4_core_cx = PT_REGS_PARM4_CORE(real_regs);
+ arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs);
+ arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs);
+
+ return 0;
+}
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != filter_pid)
+ return 0;
+
+ option_syscall = option;
+ arg2_syscall = arg2;
+ arg3_syscall = arg3;
+ arg4_syscall = arg4;
+ arg5_syscall = arg5;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index e0f42601be9b..1c1289ba5fc5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,8 @@
#define AF_INET 2
#define AF_INET6 10
+#define SOL_SOCKET 1
+#define SO_SNDBUF 7
#define __SO_ACCEPTCON (1 << 16)
#define SOL_TCP 6
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
new file mode 100644
index 000000000000..5523f77c5a44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+int g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_1")
+int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+ g = arg->a;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_2")
+int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+ g = arg->p->a;
+ return 0;
+}
+
+/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ * int __user *usockaddr_len);
+ */
+SEC("fentry/__sys_getsockname")
+int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
+ int *usockaddr_len)
+{
+ g = usockaddr->sa_family;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644
index 000000000000..b2a409e6382a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ ctx_retval_value = ctx->retval;
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ ctx->retval = 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644
index 000000000000..d6e5903e06ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EUNATCH))
+ assertion_error = 1;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
index 13499cc15c7d..2715fe27d4cf 100644
--- a/tools/testing/selftests/bpf/progs/core_kern.c
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -101,4 +101,20 @@ int balancer_ingress(struct __sk_buff *ctx)
return 0;
}
+typedef int (*func_proto_typedef___match)(long);
+typedef int (*func_proto_typedef___doesnt_match)(char *);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match);
+
+int proto_out[3];
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out[0] = bpf_core_type_exists(func_proto_typedef___match);
+ proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match);
+ proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1);
+
+ return 0;
+}
+
char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
new file mode 100644
index 000000000000..f0d5652256ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+int proto_out;
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out = bpf_core_type_exists(func_proto_typedef_nested2);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
index bca92c9bd29a..106dc75efcc4 100644
--- a/tools/testing/selftests/bpf/progs/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -3,6 +3,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char LICENSE[] SEC("license") = "GPL";
@@ -10,8 +11,8 @@ int pid = 0;
int fentry_cnt = 0;
int fexit_cnt = 0;
-SEC("fentry/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fentry(void *ctx)
{
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
@@ -20,8 +21,8 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
return 0;
}
-SEC("fexit/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+SEC("fexit/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fexit(void *ctx)
{
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
index 68a5a9db928a..7e94412d47a5 100644
--- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -7,12 +7,12 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") sock_map = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+} sock_map SEC(".maps");
SEC("freplace/cls_redirect")
int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644
index 000000000000..4e8fed75a4e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+ bpf_testmod_test_mod_kfunc(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 8a8cf59017aa..5aecbb9fdc68 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -1,13 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
SEC("tc")
int kfunc_call_test2(struct __sk_buff *skb)
{
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
return ret;
}
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ if (pt->a != 42 || pt->b != 108)
+ ret = -1;
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+ struct prog_test_pass1 p1 = {};
+ struct prog_test_pass2 p2 = {};
+ short a = 0;
+ __u64 b = 0;
+ long c = 0;
+ char d = 0;
+ int e = 0;
+
+ bpf_kfunc_call_test_pass_ctx(skb);
+ bpf_kfunc_call_test_pass1(&p1);
+ bpf_kfunc_call_test_pass2(&p2);
+
+ bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+ bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+ bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+ bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+ bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+ bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644
index 000000000000..def97f2fed90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+ return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index e5ab4836a641..45204fe0c570 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -18,7 +19,7 @@ const volatile int batch_cnt = 0;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_perfbuf(void *ctx)
{
__u64 *sample;
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 123607d314d6..6a468496f539 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -30,7 +31,7 @@ static __always_inline long get_flags()
return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_ringbuf(void *ctx)
{
long *sample, flags;
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
index 1612a32007b6..495990d355ef 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -2,19 +2,19 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") htab = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} htab SEC(".maps");
-struct bpf_map_def SEC("maps") array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} array SEC(".maps");
/* Sample program which should always load for testing control paths. */
SEC(".text") int func()
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 95d5b941bc1f..c9abfe3a11af 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
__u8 *d = data;
int err;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 79c8139b63b8..c8d810010a94 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -72,18 +72,19 @@ int _getsockopt(struct bpf_sockopt *ctx)
* reasons.
*/
- if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
- return 0; /* EPERM, bounds check */
+ /* Check that optval contains address (__u64) */
+ if (optval + sizeof(__u64) > optval_end)
+ return 0; /* bounds check */
if (((struct tcp_zerocopy_receive *)optval)->address != 0)
- return 0; /* EPERM, unexpected data */
+ return 0; /* unexpected data */
return 1;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
@@ -96,24 +97,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
* bytes of data.
*/
if (optval_end - optval != page_size)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
if (!ctx->retval)
- return 0; /* EPERM, kernel should not have handled
+ return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong!
*/
ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +153,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
*(__u32 *)optval = 0x55AA;
ctx->optlen = 4;
@@ -164,7 +165,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Always use cubic */
if (optval + 5 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
@@ -175,10 +176,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
if (ctx->optlen != page_size * 2)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
@@ -189,21 +190,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
* bytes of data.
*/
if (optval_end - optval != page_size)
- return 0; /* EPERM, unexpected data size */
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644
index 000000000000..f00a9731930e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_bpf_tuple = opts_def.error;
+
+ opts_def.reserved[0] = 1;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved = opts_def.error;
+
+ opts_def.netns_id = -2;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_netns_id = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_len_opts = opts_def.error;
+
+ opts_def.l4proto = IPPROTO_ICMP;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eproto_l4proto = opts_def.error;
+
+ opts_def.netns_id = 0xf00f;
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enonet_netns_id = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enoent_lookup = opts_def.error;
+
+ ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+ nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+ return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+ nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
index c88ccc53529a..c88ccc53529a 100644
--- a/tools/testing/selftests/bpf/progs/btf_decl_tag.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
index 160ead6c67b2..07c94df13660 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -9,12 +9,15 @@ struct ipv_counts {
unsigned int v6;
};
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
struct bpf_map_def SEC("maps") btf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct ipv_counts),
.max_entries = 4,
};
+#pragma GCC diagnostic pop
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 1884a5bd10f5..762671a2e90c 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,6 +9,8 @@ struct ipv_counts {
unsigned int v6;
};
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
/* just to validate we can handle maps in multiple sections */
struct bpf_map_def SEC("maps") btf_map_legacy = {
.type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
.value_size = sizeof(long long),
.max_entries = 4,
};
+#pragma GCC diagnostic pop
BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 15e0f9945fe4..1dabb88f8cb4 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -8,12 +8,12 @@ struct ipv_counts {
unsigned int v6;
};
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct ipv_counts));
+ __uint(max_entries, 4);
+} btf_map SEC(".maps");
__attribute__((noinline))
int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 8812a90da4eb..702578a5e496 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,20 +7,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-
-#if defined(__TARGET_ARCH_x86)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__x64_"
-#elif defined(__TARGET_ARCH_s390)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__s390x_"
-#elif defined(__TARGET_ARCH_arm64)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__arm64_"
-#else
-#define SYSCALL_WRAPPER 0
-#define SYS_PREFIX ""
-#endif
+#include "bpf_misc.h"
static struct sockaddr_in old;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index eaa7d9dba0be..5bdc0d38efc0 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -35,7 +36,7 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 83b0aaa52ef7..bf5b7caefdd0 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -392,6 +392,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, family;
+ __u32 val_u32;
bool v4;
v4 = (ctx->family == AF_INET);
@@ -418,6 +419,11 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
+ /* Load from remote_port field with zero padding (backward compatibility) */
+ val_u32 = *(__u32 *)&ctx->remote_port;
+ if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16))
+ return SK_DROP;
+
/* Narrow loads from local_port field. Expect DST_PORT. */
if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
index c304cd5b8cad..37aacc66cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -10,12 +10,12 @@
#define NUM_CGROUP_LEVELS 4
-struct bpf_map_def SEC("maps") cgroup_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..246f1f001813 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -12,6 +12,7 @@
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
@@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
return CG_OK;
}
+static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
+{
+ __u32 *word = (__u32 *)&sk->dst_port;
+ return word[0] == bpf_htonl(0xcafe0000);
+}
+
+static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
+{
+ __u16 *half = (__u16 *)&sk->dst_port;
+ return half[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
+{
+ __u8 *byte = (__u8 *)&sk->dst_port;
+ return byte[0] == 0xca && byte[1] == 0xfe;
+}
+
+SEC("cgroup_skb/egress")
+int read_sk_dst_port(struct __sk_buff *skb)
+{
+ __u32 linum, linum_idx;
+ struct bpf_sock *sk;
+
+ linum_idx = READ_SK_DST_PORT_LINUM_IDX;
+
+ sk = skb->sk;
+ if (!sk)
+ RET_LOG();
+
+ if (!sk_dst_port__load_word(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_half(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_byte(sk))
+ RET_LOG();
+
+ return CG_OK;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644
index 000000000000..9d58d61c0dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index bf28814bfde5..950a70b61e74 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -17,12 +17,12 @@
#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(uint32_t),
- .value_size = sizeof(uint64_t),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, uint32_t);
+ __type(value, uint64_t);
+ __uint(max_entries, 1);
+} flow_map SEC(".maps");
static inline int throttle_flow(struct __sk_buff *skb)
{
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index cd747cd93dbe..6edebce563b5 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -16,12 +16,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-struct bpf_map_def SEC("maps") results = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 3,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} results SEC(".maps");
static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
void *iph, __u32 ip_size,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 199c61b7d062..53b64c999450 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
- unsigned int data_len;
+ int data_len = bpf_xdp_get_buff_len(xdp);
int offset = 0;
/* Data length determine test case */
- data_len = data_end - data;
if (data_len == 54) { /* sizeof(pkt_v4) */
offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
} else if (data_len == 64) {
offset = 128;
} else if (data_len == 128) {
- offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ /* Max tail grow 3520 */
+ offset = 4096 - 256 - 320 - data_len;
+ } else if (data_len == 9000) {
+ offset = 10;
+ } else if (data_len == 9001) {
+ offset = 4096;
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index b7448253d135..ca68c038357c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -12,14 +12,38 @@
SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54) /* sizeof(pkt_v4) */
+ switch (bpf_xdp_get_buff_len(xdp)) {
+ case 54:
+ /* sizeof(pkt_v4) */
offset = 256; /* shrink too much */
- else
+ break;
+ case 9000:
+ /* non-linear buff test cases */
+ if (data + 1 > data_end)
+ return XDP_DROP;
+
+ switch (data[0]) {
+ case 0:
+ offset = 10;
+ break;
+ case 1:
+ offset = 4100;
+ break;
+ case 2:
+ offset = 8200;
+ break;
+ default:
+ return XDP_DROP;
+ }
+ break;
+ default:
offset = 20;
+ break;
+ }
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index 58cf4345f5cc..3379d303f41a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
- meta.pkt_len = data_end - data;
+ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
bpf_xdp_output(xdp, &perf_buf_map,
((__u64) meta.pkt_len << 32) |
BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644
index 000000000000..2a3496d8e327
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
+ __u8 val[16] = {};
+ __u32 offset;
+ int err;
+
+ if (data + sizeof(__u32) > data_end)
+ return XDP_DROP;
+
+ offset = *(__u32 *)data;
+ err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+ return XDP_DROP;
+
+ val[0] = 0xbb; /* update the marker */
+ val[15] = 0xbb;
+ err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644
index 000000000000..97ed625bb70a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 532025057711..20ec6723df18 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -24,7 +24,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
return XDP_PASS;
}
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
int xdp_dummy_cm(struct xdp_md *ctx)
{
if (ctx->ingress_ifindex == IFINDEX_LO)
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
return XDP_PASS;
}
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644
index 000000000000..cdcf7de7ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp/devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 1e6b9c38ea6d..4139a14f9996 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
/* valid program on DEVMAP entry via SEC name;
* has access to egress and ingress ifindex
*/
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
int xdp_dummy_dm(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
return XDP_PASS;
}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 119582aa105a..6695478c2b25 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -12,7 +13,7 @@ int trace_printk_ran = 0;
const char fmt[] = "Testing,testing %d\n";
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int sys_enter(void *ctx)
{
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
index d327241ba047..969306cd4f33 100644
--- a/tools/testing/selftests/bpf/progs/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -11,7 +12,7 @@ int null_data_vprintk_ret = 0;
int trace_vprintk_ret = 0;
int trace_vprintk_ran = 0;
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int sys_enter(void *ctx)
{
static const char one[] = "1";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2098f3f27f18..2ab049b54d6c 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -5,6 +5,7 @@
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -25,28 +26,28 @@ int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
return 0;
}
-SEC("kprobe/__x64_sys_getpgid")
+SEC("kprobe/" SYS_PREFIX "sys_getpgid")
int bench_trigger_kprobe(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return 0;
}
-SEC("fentry.s/__x64_sys_getpgid")
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry_sleep(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return 0;
}
-SEC("fmod_ret/__x64_sys_getpgid")
+SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fmodret(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 8395782b6e0a..97b26a30b59a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -70,7 +70,7 @@ int xdp_redirect_map_all_prog(struct xdp_md *ctx)
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
}
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
int xdp_devmap_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index b9f1bbbc8aba..6e6235185a86 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -61,7 +61,11 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
};
__u8 data[64] = {};
int mfd, pfd, ret, zero = 0;
- __u32 retval = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = sizeof(data),
+ .repeat = 1,
+ );
mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
if (mfd < 0)
@@ -75,9 +79,8 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
return -1;
}
- ret = bpf_prog_test_run(pfd, 1, data, sizeof(data),
- NULL, NULL, &retval, NULL);
- if (ret < 0 || retval != 42) {
+ ret = bpf_prog_test_run_opts(pfd, &topts);
+ if (ret < 0 || topts.retval != 42) {
ret = -1;
} else {
assert(!bpf_map_lookup_elem(mfd, &zero, value));
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 5620919fde9e..826f4423ce02 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -23,6 +23,12 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
+readonly NS4="ns4-$(mktemp -u XXXXXX)"
+readonly NS5="ns5-$(mktemp -u XXXXXX)"
+readonly NS6="ns6-$(mktemp -u XXXXXX)"
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -41,23 +47,23 @@ cleanup()
fi
set +e
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
- ip netns del ns4 2> /dev/null
- ip netns del ns5 2> /dev/null
- ip netns del ns6 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
+ ip netns del ${NS3} 2> /dev/null
+ ip netns del ${NS4} 2> /dev/null
+ ip netns del ${NS5} 2> /dev/null
+ ip netns del ${NS6} 2> /dev/null
rm -f $TMP_FILE
}
set -e
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-ip netns add ns4
-ip netns add ns5
-ip netns add ns6
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
+ip netns add ${NS4}
+ip netns add ${NS5}
+ip netns add ${NS6}
trap cleanup 0 2 3 6 9
@@ -67,78 +73,78 @@ ip link add veth5 type veth peer name veth6
ip link add veth7 type veth peer name veth8
ip link add veth9 type veth peer name veth10
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-ip link set veth3 netns ns2
-ip link set veth4 netns ns3
-ip link set veth5 netns ns3
-ip link set veth6 netns ns4
-ip link set veth7 netns ns4
-ip link set veth8 netns ns5
-ip link set veth9 netns ns5
-ip link set veth10 netns ns6
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-ip netns exec ns2 ip link set dev veth3 up
-ip netns exec ns3 ip link set dev veth4 up
-ip netns exec ns3 ip link set dev veth5 up
-ip netns exec ns4 ip link set dev veth6 up
-ip netns exec ns4 ip link set dev veth7 up
-ip netns exec ns5 ip link set dev veth8 up
-ip netns exec ns5 ip link set dev veth9 up
-ip netns exec ns6 ip link set dev veth10 up
-ip netns exec ns6 ip link set dev lo up
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
+ip link set veth3 netns ${NS2}
+ip link set veth4 netns ${NS3}
+ip link set veth5 netns ${NS3}
+ip link set veth6 netns ${NS4}
+ip link set veth7 netns ${NS4}
+ip link set veth8 netns ${NS5}
+ip link set veth9 netns ${NS5}
+ip link set veth10 netns ${NS6}
+
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
+ip netns exec ${NS2} ip link set dev veth3 up
+ip netns exec ${NS3} ip link set dev veth4 up
+ip netns exec ${NS3} ip link set dev veth5 up
+ip netns exec ${NS4} ip link set dev veth6 up
+ip netns exec ${NS4} ip link set dev veth7 up
+ip netns exec ${NS5} ip link set dev veth8 up
+ip netns exec ${NS5} ip link set dev veth9 up
+ip netns exec ${NS6} ip link set dev veth10 up
+ip netns exec ${NS6} ip link set dev lo up
# All link scope addresses and routes required between veths
-ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
-ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
-ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
-
-ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
-ip netns exec ns4 ip -6 addr add fc42::1 dev lo
-ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
-
-ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
-ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
+ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
+ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
+ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
kill -TERM $!
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 50f7e74ca0b9..cbebfaa7c1e8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -738,7 +738,7 @@ static void test_sockmap(unsigned int tasks, void *data)
sizeof(key), sizeof(value),
6, NULL);
if (fd < 0) {
- if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+ if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) {
printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
__func__);
skips++;
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 6413c1472554..102e6588e2fe 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -4,6 +4,7 @@
# Copyright (c) 2019 Cloudflare
set -eu
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
wait_for_ip()
{
@@ -28,12 +29,12 @@ get_prog_id()
ns1_exec()
{
- ip netns exec ns1 "$@"
+ ip netns exec ${NS1} "$@"
}
setup()
{
- ip netns add ns1
+ ip netns add ${NS1}
ns1_exec ip link set lo up
ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 76cd903117af..92e3465fbae8 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -31,6 +31,7 @@
#include <linux/if_ether.h>
#include <linux/btf.h>
+#include <bpf/btf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
static int skips;
static bool verbose = false;
+struct kfunc_btf_id_pair {
+ const char *kfunc;
+ int insn_idx;
+};
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
int fixup_map_reuseport_array[MAX_FIXUPS];
int fixup_map_ringbuf[MAX_FIXUPS];
int fixup_map_timer[MAX_FIXUPS];
+ struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
* Can be a tab-separated sequence of expected strings. An empty string
* means no log verification.
@@ -449,7 +456,7 @@ static int probe_filter_length(const struct bpf_insn *fp)
static bool skip_unsupported_map(enum bpf_map_type map_type)
{
- if (!bpf_probe_map_type(map_type, 0)) {
+ if (!libbpf_probe_bpf_map_type(map_type, NULL)) {
printf("SKIP (unsupported map type %d)\n", map_type);
skips++;
return true;
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
int *fixup_map_timer = test->fixup_map_timer;
+ struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_timer++;
} while (*fixup_map_timer);
}
+
+ /* Patch in kfunc BTF IDs */
+ if (fixup_kfunc_btf_id->kfunc) {
+ struct btf *btf;
+ int btf_id;
+
+ do {
+ btf_id = 0;
+ btf = btf__load_vmlinux_btf();
+ if (btf) {
+ btf_id = btf__find_by_name_kind(btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ }
+ btf__free(btf);
+ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+ fixup_kfunc_btf_id++;
+ } while (fixup_kfunc_btf_id->kfunc);
+ }
}
struct libcap {
@@ -993,13 +1021,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
{
__u8 tmp[TEST_DATA_LEN << 2];
__u32 size_tmp = sizeof(tmp);
- uint32_t retval;
int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = size_data,
+ .data_out = tmp,
+ .data_size_out = size_tmp,
+ .repeat = 1,
+ );
if (unpriv)
set_admin(true);
- err = bpf_prog_test_run(fd_prog, 1, data, size_data,
- tmp, &size_tmp, &retval, NULL);
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
saved_errno = errno;
if (unpriv)
@@ -1023,9 +1056,8 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
}
}
- if (retval != expected_val &&
- expected_val != POINTER_VALUE) {
- printf("FAIL retval %d != %d ", retval, expected_val);
+ if (topts.retval != expected_val && expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d ", topts.retval, expected_val);
return 1;
}
@@ -1148,7 +1180,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
* bpf_probe_prog_type won't give correct answer
*/
if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
- !bpf_probe_prog_type(prog_type, 0)) {
+ !libbpf_probe_bpf_prog_type(prog_type, NULL)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index d10cefd6eb09..ea69370caae3 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -2,6 +2,8 @@
# Kselftest framework requirement - SKIP code is 4.
readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
cleanup()
{
@@ -13,8 +15,8 @@ cleanup()
set +e
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
ip link set dev lo xdp off 2>/dev/null > /dev/null
@@ -24,32 +26,32 @@ if [ $? -ne 0 ];then
fi
set -e
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
trap cleanup 0 2 3 6 9
ip link add veth1 type veth peer name veth2
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-ip netns exec ns1 tc qdisc add dev veth1 clsact
-ip netns exec ns2 tc qdisc add dev veth2 clsact
+ip netns exec ${NS1} tc qdisc add dev veth1 clsact
+ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+ip netns exec ${NS1} ping -c 1 10.1.1.22
+ip netns exec ${NS2} ping -c 1 10.1.1.11
exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index 57c8db9972a6..1d79f31480ad 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,6 +10,8 @@
# | xdp forwarding |
# ------------------
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
ret=0
setup()
@@ -17,27 +19,27 @@ setup()
local xdpmode=$1
- ip netns add ns1
- ip netns add ns2
+ ip netns add ${NS1}
+ ip netns add ${NS2}
- ip link add veth1 index 111 type veth peer name veth11 netns ns1
- ip link add veth2 index 222 type veth peer name veth22 netns ns2
+ ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
ip link set veth1 up
ip link set veth2 up
- ip -n ns1 link set dev veth11 up
- ip -n ns2 link set dev veth22 up
+ ip -n ${NS1} link set dev veth11 up
+ ip -n ${NS2} link set dev veth22 up
- ip -n ns1 addr add 10.1.1.11/24 dev veth11
- ip -n ns2 addr add 10.1.1.22/24 dev veth22
+ ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
}
cleanup()
{
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
test_xdp_redirect()
@@ -52,13 +54,13 @@ test_xdp_redirect()
return 0
fi
- ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
- ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+ ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+ ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
- if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null &&
- ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then
+ if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
+ ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
echo "selftests: test_xdp_redirect $xdpmode [PASS]";
else
ret=1
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index 05f872740999..cc57cb87e65f 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress"
PASS=0
FAIL=0
LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
test_pass()
{
@@ -47,11 +52,9 @@ test_fail()
clean_up()
{
- for i in $(seq $NUM); do
- ip link del veth$i 2> /dev/null
- ip netns del ns$i 2> /dev/null
+ for i in $(seq 0 $NUM); do
+ ip netns del ${NS[$i]} 2> /dev/null
done
- ip netns del ns0 2> /dev/null
}
# Kselftest framework requirement - SKIP code is 4.
@@ -79,23 +82,22 @@ setup_ns()
mode="xdpdrv"
fi
- ip netns add ns0
+ ip netns add ${NS[0]}
for i in $(seq $NUM); do
- ip netns add ns$i
- ip -n ns$i link add veth0 index 2 type veth \
- peer name veth$i netns ns0 index $((1 + $i))
- ip -n ns0 link set veth$i up
- ip -n ns$i link set veth0 up
-
- ip -n ns$i addr add 192.0.2.$i/24 dev veth0
- ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ ip netns add ${NS[$i]}
+ ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+ ip -n ${NS[$i]} link set veth0 up
+ ip -n ${NS[0]} link set veth$i up
+
+ ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+ ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
# Add a neigh entry for IPv4 ping test
- ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ns$i link set veth0 $mode obj \
+ ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ${NS[$i]} link set veth0 $mode obj \
xdp_dummy.o sec xdp &> /dev/null || \
{ test_fail "Unable to load dummy xdp" && exit 1; }
IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}')
+ veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
done
}
@@ -104,10 +106,10 @@ do_egress_tests()
local mode=$1
# mac test
- ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
sleep 0.5
- ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -123,18 +125,18 @@ do_ping_tests()
local mode=$1
# ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+ ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
- ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+ ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
sleep 0.5
# ARP test
- ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254
+ ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
# IPv4 test
- ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
# IPv6 test
- ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -180,7 +182,7 @@ do_tests()
xdpgeneric) drv_p="-S";;
esac
- ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+ ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
xdp_pid=$!
sleep 1
if ! ps -p $xdp_pid > /dev/null; then
@@ -197,10 +199,10 @@ do_tests()
kill $xdp_pid
}
-trap clean_up EXIT
-
check_env
+trap clean_up EXIT
+
for mode in ${DRV_MODE}; do
setup_ns $mode
do_tests $mode
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index a3a1eaee26ea..392d28cc4e58 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -22,6 +22,9 @@ ksft_skip=4
TESTNAME=xdp_veth
BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
BPF_DIR=$BPF_FS/test_$TESTNAME
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
_cleanup()
{
@@ -29,9 +32,9 @@ _cleanup()
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
ip link del veth3 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
+ ip netns del ${NS3} 2> /dev/null
rm -rf $BPF_DIR 2> /dev/null
}
@@ -77,24 +80,24 @@ set -e
trap cleanup_skip EXIT
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
-ip link add veth1 index 111 type veth peer name veth11 netns ns1
-ip link add veth2 index 122 type veth peer name veth22 netns ns2
-ip link add veth3 index 133 type veth peer name veth33 netns ns3
+ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ip link add veth2 index 122 type veth peer name veth22 netns ${NS2}
+ip link add veth3 index 133 type veth peer name veth33 netns ${NS3}
ip link set veth1 up
ip link set veth2 up
ip link set veth3 up
-ip -n ns1 addr add 10.1.1.11/24 dev veth11
-ip -n ns3 addr add 10.1.1.33/24 dev veth33
+ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ip -n ${NS3} addr add 10.1.1.33/24 dev veth33
-ip -n ns1 link set dev veth11 up
-ip -n ns2 link set dev veth22 up
-ip -n ns3 link set dev veth33 up
+ip -n ${NS1} link set dev veth11 up
+ip -n ${NS2} link set dev veth22 up
+ip -n ${NS3} link set dev veth33 up
mkdir $BPF_DIR
bpftool prog loadall \
@@ -107,12 +110,12 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp
+ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.o sec xdp
trap cleanup EXIT
-ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+ip netns exec ${NS1} ping -c 1 -W 1 10.1.1.33
exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index 0cbc7604a2f8..810c407e0286 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -4,6 +4,8 @@
# Kselftest framework requirement - SKIP code is 4.
readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
# Allow wrapper scripts to name test
if [ -z "$TESTNAME" ]; then
@@ -49,15 +51,15 @@ cleanup()
if [ -n "$INTERACTIVE" ]; then
echo "Namespace setup still active explore with:"
- echo " ip netns exec ns1 bash"
- echo " ip netns exec ns2 bash"
+ echo " ip netns exec ${NS1} bash"
+ echo " ip netns exec ${NS2} bash"
exit $status
fi
set +e
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
# Using external program "getopt" to get --long-options
@@ -126,8 +128,8 @@ fi
# Interactive mode likely require us to cleanup netns
if [ -n "$INTERACTIVE" ]; then
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
fi
# Exit on failure
@@ -144,8 +146,8 @@ if [ -n "$VERBOSE" ]; then
fi
# Create two namespaces
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
# Run cleanup if failing or on kill
trap cleanup 0 2 3 6 9
@@ -154,44 +156,44 @@ trap cleanup 0 2 3 6 9
ip link add veth1 type veth peer name veth2
# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
# NOTICE: XDP require VLAN header inside packet payload
# - Thus, disable VLAN offloading driver features
# - For veth REMEMBER TX side VLAN-offload
#
# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ns1 ethtool -K veth1 rxvlan off
-ip netns exec ns2 ethtool -K veth2 rxvlan off
+ip netns exec ${NS1} ethtool -K veth1 rxvlan off
+ip netns exec ${NS2} ethtool -K veth2 rxvlan off
#
# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ns2 ethtool -K veth2 txvlan off
-ip netns exec ns1 ethtool -K veth1 txvlan off
+ip netns exec ${NS2} ethtool -K veth2 txvlan off
+ip netns exec ${NS1} ethtool -K veth1 txvlan off
export IPADDR1=100.64.41.1
export IPADDR2=100.64.41.2
# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ns1 ip link set veth1 up
+ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ${NS1} ip link set veth1 up
# In ns2/veth2 create VLAN device
export VLAN=4011
export DEVNS2=veth2
-ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ns2 ip link set $DEVNS2 up
-ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ${NS2} ip link set $DEVNS2 up
+ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ns1 ip link set lo up
-ip netns exec ns2 ip link set lo up
+ip netns exec ${NS1} ip link set lo up
+ip netns exec ${NS2} ip link set lo up
# At this point, the hosts cannot reach each-other,
# because ns2 are using VLAN tags on the packets.
-ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
+ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
@@ -202,19 +204,19 @@ export FILE=test_xdp_vlan.o
# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
export XDP_PROG=xdp_vlan_change
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
# In ns1: egress use TC to add back VLAN tag 4011
# (del cmd)
# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
#
-ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
-ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
+ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
# Now the namespaces can reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
# Second test: Replace xdp prog, that fully remove vlan header
#
@@ -223,9 +225,9 @@ ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
# ETH_P_8021Q indication, and this cause overwriting of our changes.
#
export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7b7f918eda77..ca6abae9b09c 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -138,6 +138,29 @@ void read_trace_pipe(void)
}
}
+ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, end, base;
+ char buf[256];
+ bool found = false;
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ if (!found)
+ return -ESRCH;
+
#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
#define OP_RT_RA_MASK 0xffff0000UL
@@ -145,10 +168,6 @@ void read_trace_pipe(void)
#define ADDIS_R2_R12 0x3c4c0000UL
#define ADDI_R2_R2 0x38420000UL
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
- u32 *insn = (u32 *)(uintptr_t)addr;
-
/*
* A PPC64 ABIv2 function may have a local and a global entry
* point. We need to use the local entry point when patching
@@ -165,43 +184,16 @@ ssize_t get_uprobe_offset(const void *addr, ssize_t base)
* lis r2,XXXX
* addi r2,r2,XXXX
*/
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (ssize_t)(insn + 2) - base;
- else
- return (uintptr_t)addr - base;
-}
-
-#else
+ {
+ const u32 *insn = (const u32 *)(uintptr_t)addr;
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
- return (uintptr_t)addr - base;
-}
-
-#endif
-
-ssize_t get_base_addr(void)
-{
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (uintptr_t)(insn + 2) - start + base;
}
-
- fclose(f);
- return -EINVAL;
+#endif
+ return (uintptr_t)addr - start + base;
}
ssize_t get_rel_offset(uintptr_t addr)
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index d907b445524d..238a9c98cde2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,8 +18,7 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
-ssize_t get_uprobe_offset(const void *addr, ssize_t base);
-ssize_t get_base_addr(void);
+ssize_t get_uprobe_offset(const void *addr);
ssize_t get_rel_offset(uintptr_t addr);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index d7b74eb28333..829be2b9e08e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -22,6 +22,81 @@
.result = ACCEPT,
},
{
+ "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail1", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail2", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail3", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 expected pointer to ctx, but got PTR",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_pass_ctx", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type UNKNOWN must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+ },
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index ce13ece08d51..8c224eac93df 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -121,7 +121,25 @@
.result = ACCEPT,
},
{
- "sk_fullsock(skb->sk): sk->dst_port [narrow load]",
+ "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = ACCEPT,
+},
+{
+ "sk_fullsock(skb->sk): sk->dst_port [half load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -139,7 +157,64 @@
.result = ACCEPT,
},
{
- "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
+ "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = REJECT,
+ .errstr = "invalid sock access",
+},
+{
+ "sk_fullsock(skb->sk): sk->dst_port [byte load]",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = ACCEPT,
+},
+{
+ "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .result = REJECT,
+ .errstr = "invalid sock access",
+},
+{
+ "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -149,7 +224,7 @@
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index 51c8224b4ccc..aaedbf4955c3 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
int i;
for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id)
- bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+ bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
}
exit(0);
@@ -210,7 +210,7 @@ int main(int argc, char **argv)
}
/* bind prog_fd to each interface */
- ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+ ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
if (ret) {
printf("Set xdp fd failed on %d\n", ifindex);
goto err_out;
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index baa870a759a2..c567856fd1bc 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static void cleanup(int sig)
{
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
if (sig)
exit(1);
}
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
goto done;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 0a5d23da486d..5f8296d29e77 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -266,22 +266,24 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
}
static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
- struct ifobject *ifobject, u32 qid)
+ struct ifobject *ifobject, bool shared)
{
- struct xsk_socket_config cfg;
+ struct xsk_socket_config cfg = {};
struct xsk_ring_cons *rxr;
struct xsk_ring_prod *txr;
xsk->umem = umem;
cfg.rx_size = xsk->rxqsize;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.libbpf_flags = 0;
+ cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
cfg.xdp_flags = ifobject->xdp_flags;
cfg.bind_flags = ifobject->bind_flags;
+ if (shared)
+ cfg.bind_flags |= XDP_SHARED_UMEM;
txr = ifobject->tx_on ? &xsk->tx : NULL;
rxr = ifobject->rx_on ? &xsk->rx : NULL;
- return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
+ return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
}
static struct option long_options[] = {
@@ -387,7 +389,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
for (i = 0; i < MAX_INTERFACES; i++) {
struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
- ifobj->umem = &ifobj->umem_arr[0];
ifobj->xsk = &ifobj->xsk_arr[0];
ifobj->use_poll = false;
ifobj->pacing_on = true;
@@ -401,11 +402,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
ifobj->tx_on = false;
}
+ memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+ ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+ ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
for (j = 0; j < MAX_SOCKETS; j++) {
- memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
- ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS;
- ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
}
}
@@ -906,7 +908,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
- expected_stat -= RX_FULL_RXQSIZE;
+ if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+ else
+ expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
break;
case STAT_TEST_RX_FILL_EMPTY:
xsk_stat = stats.rx_fill_ring_empty_descs;
@@ -947,7 +952,10 @@ static void tx_stats_validate(struct ifobject *ifobject)
static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
{
+ u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ int ret, ifindex;
+ void *bufs;
u32 i;
ifobject->ns_fd = switch_namespace(ifobject->nsname);
@@ -955,23 +963,20 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
if (ifobject->umem->unaligned_mode)
mmap_flags |= MAP_HUGETLB;
- for (i = 0; i < test->nb_sockets; i++) {
- u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
- u32 ctr = 0;
- void *bufs;
- int ret;
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
- bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
+ ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+ if (ret)
+ exit_with_error(-ret);
- ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz);
- if (ret)
- exit_with_error(-ret);
+ for (i = 0; i < test->nb_sockets; i++) {
+ u32 ctr = 0;
while (ctr++ < SOCK_RECONF_CTR) {
- ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i],
- ifobject, i);
+ ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
+ ifobject, !!i);
if (!ret)
break;
@@ -982,8 +987,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
}
}
- ifobject->umem = &ifobject->umem_arr[0];
ifobject->xsk = &ifobject->xsk_arr[0];
+
+ if (!ifobject->rx_on)
+ return;
+
+ ifindex = if_nametoindex(ifobject->ifname);
+ if (!ifindex)
+ exit_with_error(errno);
+
+ ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
+ if (ret)
+ exit_with_error(-ret);
+
+ ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+ if (ret)
+ exit_with_error(-ret);
}
static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
@@ -1139,14 +1158,16 @@ static void testapp_bidi(struct test_spec *test)
static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
{
+ int ret;
+
xsk_socket__delete(ifobj_tx->xsk->xsk);
- xsk_umem__delete(ifobj_tx->umem->umem);
xsk_socket__delete(ifobj_rx->xsk->xsk);
- xsk_umem__delete(ifobj_rx->umem->umem);
- ifobj_tx->umem = &ifobj_tx->umem_arr[1];
ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
- ifobj_rx->umem = &ifobj_rx->umem_arr[1];
ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+
+ ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
+ if (ret)
+ exit_with_error(-ret);
}
static void testapp_bpf_res(struct test_spec *test)
@@ -1405,13 +1426,13 @@ static struct ifobject *ifobject_create(void)
if (!ifobj->xsk_arr)
goto out_xsk_arr;
- ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr));
- if (!ifobj->umem_arr)
- goto out_umem_arr;
+ ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+ if (!ifobj->umem)
+ goto out_umem;
return ifobj;
-out_umem_arr:
+out_umem:
free(ifobj->xsk_arr);
out_xsk_arr:
free(ifobj);
@@ -1420,7 +1441,7 @@ out_xsk_arr:
static void ifobject_delete(struct ifobject *ifobj)
{
- free(ifobj->umem_arr);
+ free(ifobj->umem);
free(ifobj->xsk_arr);
free(ifobj);
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 2f705f44b748..62a3e6388632 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -125,10 +125,10 @@ struct ifobject {
struct xsk_socket_info *xsk;
struct xsk_socket_info *xsk_arr;
struct xsk_umem_info *umem;
- struct xsk_umem_info *umem_arr;
thread_func_t func_ptr;
struct pkt_stream *pkt_stream;
int ns_fd;
+ int xsk_map_fd;
u32 dst_ip;
u32 src_ip;
u32 xdp_flags;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 7581a7348e1b..21a411b04890 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -35,4 +35,4 @@ test_unix_oob
gro
ioam6_parser
toeplitz
-cmsg_so_mark
+cmsg_sender
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9897fa9ab953..3bfeaf06b960 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,6 +30,7 @@ TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_time.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -52,7 +53,7 @@ TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_so_mark
+TEST_GEN_FILES += cmsg_sender
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
new file mode 100644
index 000000000000..24444dc72543
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <errno.h>
+#include <error.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/net_tstamp.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <sys/socket.h>
+
+enum {
+ ERN_SUCCESS = 0,
+ /* Well defined errors, callers may depend on these */
+ ERN_SEND = 1,
+ /* Informational, can reorder */
+ ERN_HELP,
+ ERN_SEND_SHORT,
+ ERN_SOCK_CREATE,
+ ERN_RESOLVE,
+ ERN_CMSG_WR,
+ ERN_SOCKOPT,
+ ERN_GETTIME,
+ ERN_RECVERR,
+ ERN_CMSG_RD,
+ ERN_CMSG_RCV,
+};
+
+struct options {
+ bool silent_send;
+ const char *host;
+ const char *service;
+ struct {
+ unsigned int mark;
+ } sockopt;
+ struct {
+ unsigned int family;
+ unsigned int type;
+ unsigned int proto;
+ } sock;
+ struct {
+ bool ena;
+ unsigned int val;
+ } mark;
+ struct {
+ bool ena;
+ unsigned int delay;
+ } txtime;
+ struct {
+ bool ena;
+ } ts;
+} opt = {
+ .sock = {
+ .family = AF_UNSPEC,
+ .type = SOCK_DGRAM,
+ .proto = IPPROTO_UDP,
+ },
+};
+
+static struct timespec time_start_real;
+static struct timespec time_start_mono;
+
+static void __attribute__((noreturn)) cs_usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-s Silent send() failures\n"
+ "\t\t-4/-6 Force IPv4 / IPv6 only\n"
+ "\t\t-p prot Socket protocol\n"
+ "\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
+ "\n"
+ "\t\t-m val Set SO_MARK with given value\n"
+ "\t\t-M val Set SO_MARK via setsockopt\n"
+ "\t\t-d val Set SO_TXTIME with given delay (usec)\n"
+ "\t\t-t Enable time stamp reporting\n"
+ "");
+ exit(ERN_HELP);
+}
+
+static void cs_parse_args(int argc, char *argv[])
+{
+ char o;
+
+ while ((o = getopt(argc, argv, "46sp:m:M:d:t")) != -1) {
+ switch (o) {
+ case 's':
+ opt.silent_send = true;
+ break;
+ case '4':
+ opt.sock.family = AF_INET;
+ break;
+ case '6':
+ opt.sock.family = AF_INET6;
+ break;
+ case 'p':
+ if (*optarg == 'u' || *optarg == 'U') {
+ opt.sock.proto = IPPROTO_UDP;
+ } else if (*optarg == 'i' || *optarg == 'I') {
+ opt.sock.proto = IPPROTO_ICMP;
+ } else if (*optarg == 'r') {
+ opt.sock.type = SOCK_RAW;
+ } else {
+ printf("Error: unknown protocol: %s\n", optarg);
+ cs_usage(argv[0]);
+ }
+ break;
+
+ case 'm':
+ opt.mark.ena = true;
+ opt.mark.val = atoi(optarg);
+ break;
+ case 'M':
+ opt.sockopt.mark = atoi(optarg);
+ break;
+ case 'd':
+ opt.txtime.ena = true;
+ opt.txtime.delay = atoi(optarg);
+ break;
+ case 't':
+ opt.ts.ena = true;
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ cs_usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+static void
+cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+ struct cmsghdr *cmsg;
+ size_t cmsg_len;
+
+ msg->msg_control = cbuf;
+ cmsg_len = 0;
+
+ if (opt.mark.ena) {
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(sizeof(__u32));
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_MARK;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+ *(__u32 *)CMSG_DATA(cmsg) = opt.mark.val;
+ }
+ if (opt.txtime.ena) {
+ struct sock_txtime so_txtime = {
+ .clockid = CLOCK_MONOTONIC,
+ };
+ __u64 txtime;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime, sizeof(so_txtime)))
+ error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+
+ txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
+ time_start_mono.tv_nsec +
+ opt.txtime.delay * 1000;
+
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(sizeof(txtime));
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(txtime));
+ memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
+ }
+ if (opt.ts.ena) {
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ &val, sizeof(val)))
+ error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(sizeof(__u32));
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_TIMESTAMPING;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+ *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE;
+ }
+
+ if (cmsg_len)
+ msg->msg_controllen = cmsg_len;
+ else
+ msg->msg_control = NULL;
+}
+
+static const char *cs_ts_info2str(unsigned int info)
+{
+ static const char *names[] = {
+ [SCM_TSTAMP_SND] = "SND",
+ [SCM_TSTAMP_SCHED] = "SCHED",
+ [SCM_TSTAMP_ACK] = "ACK",
+ };
+
+ if (info < sizeof(names) / sizeof(names[0]))
+ return names[info];
+ return "unknown";
+}
+
+static void
+cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+ struct sock_extended_err *see;
+ struct scm_timestamping *ts;
+ struct cmsghdr *cmsg;
+ int i, err;
+
+ if (!opt.ts.ena)
+ return;
+ msg->msg_control = cbuf;
+ msg->msg_controllen = cbuf_sz;
+
+ while (true) {
+ ts = NULL;
+ see = NULL;
+ memset(cbuf, 0, cbuf_sz);
+
+ err = recvmsg(fd, msg, MSG_ERRQUEUE);
+ if (err < 0) {
+ if (errno == EAGAIN)
+ break;
+ error(ERN_RECVERR, errno, "recvmsg ERRQ");
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMPING_OLD) {
+ if (cmsg->cmsg_len < sizeof(*ts))
+ error(ERN_CMSG_RD, EINVAL, "TS cmsg");
+
+ ts = (void *)CMSG_DATA(cmsg);
+ }
+ if ((cmsg->cmsg_level == SOL_IP &&
+ cmsg->cmsg_type == IP_RECVERR) ||
+ (cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_RECVERR)) {
+ if (cmsg->cmsg_len < sizeof(*see))
+ error(ERN_CMSG_RD, EINVAL, "sock_err cmsg");
+
+ see = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!ts)
+ error(ERN_CMSG_RCV, ENOENT, "TS cmsg not found");
+ if (!see)
+ error(ERN_CMSG_RCV, ENOENT, "sock_err cmsg not found");
+
+ for (i = 0; i < 3; i++) {
+ unsigned long long rel_time;
+
+ if (!ts->ts[i].tv_sec && !ts->ts[i].tv_nsec)
+ continue;
+
+ rel_time = (ts->ts[i].tv_sec - time_start_real.tv_sec) *
+ (1000ULL * 1000) +
+ (ts->ts[i].tv_nsec - time_start_real.tv_nsec) /
+ 1000;
+ printf(" %5s ts%d %lluus\n",
+ cs_ts_info2str(see->ee_info),
+ i, rel_time);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ char buf[] = "blablablabla";
+ struct addrinfo hints, *ai;
+ struct iovec iov[1];
+ struct msghdr msg;
+ char cbuf[1024];
+ int err;
+ int fd;
+
+ cs_parse_args(argc, argv);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = opt.sock.family;
+
+ ai = NULL;
+ err = getaddrinfo(opt.host, opt.service, &hints, &ai);
+ if (err) {
+ fprintf(stderr, "Can't resolve address [%s]:%s\n",
+ opt.host, opt.service);
+ return ERN_SOCK_CREATE;
+ }
+
+ if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
+ opt.sock.proto = IPPROTO_ICMPV6;
+
+ fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
+ if (fd < 0) {
+ fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
+ freeaddrinfo(ai);
+ return ERN_RESOLVE;
+ }
+
+ if (opt.sock.proto == IPPROTO_ICMP) {
+ buf[0] = ICMP_ECHO;
+ buf[1] = 0;
+ } else if (opt.sock.proto == IPPROTO_ICMPV6) {
+ buf[0] = ICMPV6_ECHO_REQUEST;
+ buf[1] = 0;
+ } else if (opt.sock.type == SOCK_RAW) {
+ struct udphdr hdr = { 1, 2, htons(sizeof(buf)), 0 };
+ struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;;
+
+ memcpy(buf, &hdr, sizeof(hdr));
+ sin6->sin6_port = htons(opt.sock.proto);
+ }
+
+ if (opt.sockopt.mark &&
+ setsockopt(fd, SOL_SOCKET, SO_MARK,
+ &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
+ error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+
+ if (clock_gettime(CLOCK_REALTIME, &time_start_real))
+ error(ERN_GETTIME, errno, "gettime REALTIME");
+ if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
+ error(ERN_GETTIME, errno, "gettime MONOTINIC");
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(buf);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = ai->ai_addr;
+ msg.msg_namelen = ai->ai_addrlen;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+
+ cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+
+ err = sendmsg(fd, &msg, 0);
+ if (err < 0) {
+ if (!opt.silent_send)
+ fprintf(stderr, "send failed: %s\n", strerror(errno));
+ err = ERN_SEND;
+ goto err_out;
+ } else if (err != sizeof(buf)) {
+ fprintf(stderr, "short send\n");
+ err = ERN_SEND_SHORT;
+ goto err_out;
+ } else {
+ err = ERN_SUCCESS;
+ }
+
+ /* Make sure all timestamps have time to loop back */
+ usleep(opt.txtime.delay);
+
+ cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+
+err_out:
+ close(fd);
+ freeaddrinfo(ai);
+ return err;
+}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.c b/tools/testing/selftests/net/cmsg_so_mark.c
deleted file mode 100644
index 27f2804892a7..000000000000
--- a/tools/testing/selftests/net/cmsg_so_mark.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <errno.h>
-#include <netdb.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/types.h>
-#include <sys/socket.h>
-
-int main(int argc, const char **argv)
-{
- char cbuf[CMSG_SPACE(sizeof(__u32))];
- struct addrinfo hints, *ai;
- struct cmsghdr *cmsg;
- struct iovec iov[1];
- struct msghdr msg;
- int mark;
- int err;
- int fd;
-
- if (argc != 4) {
- fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]);
- return 1;
- }
- mark = atoi(argv[3]);
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = AF_UNSPEC;
- hints.ai_socktype = SOCK_DGRAM;
-
- ai = NULL;
- err = getaddrinfo(argv[1], argv[2], &hints, &ai);
- if (err) {
- fprintf(stderr, "Can't resolve address: %s\n", strerror(errno));
- return 1;
- }
-
- fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
- if (fd < 0) {
- fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
- freeaddrinfo(ai);
- return 1;
- }
-
- iov[0].iov_base = "bla";
- iov[0].iov_len = 4;
-
- msg.msg_name = ai->ai_addr;
- msg.msg_namelen = ai->ai_addrlen;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_control = cbuf;
- msg.msg_controllen = sizeof(cbuf);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SO_MARK;
- cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
- *(__u32 *)CMSG_DATA(cmsg) = mark;
-
- err = sendmsg(fd, &msg, 0);
-
- close(fd);
- freeaddrinfo(ai);
- return err != 4;
-}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 19c6aab8d0e9..1650b8622f2f 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -18,6 +18,8 @@ trap cleanup EXIT
# Namespaces
ip netns add $NS
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
# Connectivity
ip -netns $NS link add type dummy
ip -netns $NS link set dev dummy0 up
@@ -41,15 +43,29 @@ check_result() {
fi
}
-ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $((MARK + 1))
-check_result $? 0 "IPv4 pass"
-ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $((MARK + 1))
-check_result $? 0 "IPv6 pass"
+for ovr in setsock cmsg both; do
+ for i in 4 6; do
+ [ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
+
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-M"
+ [ $ovr == "cmsg" ] && m="-m"
+ [ $ovr == "both" ] && m="-M $MARK -m"
+
+ ip netns exec $NS ./cmsg_sender -$i -p $p $m $((MARK + 1)) $TGT 1234
+ check_result $? 0 "$prot $ovr - pass"
+
+ [ $ovr == "diff" ] && m="-M $((MARK + 1)) -m"
-ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $MARK
-check_result $? 1 "IPv4 rejection"
-ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $MARK
-check_result $? 1 "IPv6 rejection"
+ ip netns exec $NS ./cmsg_sender -$i -p $p $m $MARK -s $TGT 1234
+ check_result $? 1 "$prot $ovr - rejection"
+ done
+ done
+done
# Summary
if [ $BAD -ne 0 ]; then
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
new file mode 100755
index 000000000000..91161e1da734
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS=ns
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+
+cleanup()
+{
+ ip netns del $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Need FQ for TXTIME
+ip netns exec $NS tc qdisc replace dev dummy0 root fq
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne 0 ]; then
+ echo " Case $4 returned $1, expected 0"
+ ((BAD++))
+ elif [ "$2" != "$3" ]; then
+ echo " Case $4 returned '$2', expected '$3'"
+ ((BAD++))
+ fi
+}
+
+for i in "-4 $TGT4" "-6 $TGT6"; do
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDPv${i:1:2}
+ [ $p == "i" ] && prot=ICMPv${i:1:2}
+ [ $p == "r" ] && prot=RAWv${i:1:2}
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234)
+ check_result $? "$ts" "" "$prot - no options"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | wc -l)
+ check_result $? "$ts" "2" "$prot - ts cnt"
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+ sed -n "s/.*SCHED ts0 [0-9].*/OK/p")
+ check_result $? "$ts" "OK" "$prot - ts0 SCHED"
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+ sed -n "s/.*SND ts0 [0-9].*/OK/p")
+ check_result $? "$ts" "OK" "$prot - ts0 SND"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ awk '/SND/ { if ($3 > 1000) print "OK"; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME abs"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ awk '/SND/ {snd=$3}
+ /SCHED/ {sch=$3}
+ END { if (snd - sch > 500) print "OK"; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME rel"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 43ea8407a82e..4f70baad867d 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -96,7 +96,7 @@ fib_rule6_del()
fib_rule6_del_by_pref()
{
- pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1)
$IP -6 rule del pref $pref
}
@@ -104,17 +104,36 @@ fib_rule6_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
+ local description="$3"
$IP -6 rule add $match table $RTABLE
$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
- log_test $? 0 "rule6 check: $1"
+ log_test $? 0 "rule6 check: $description"
fib_rule6_del_by_pref "$match"
- log_test $? 0 "rule6 del by pref: $match"
+ log_test $? 0 "rule6 del by pref: $description"
+}
+
+fib_rule6_test_reject()
+{
+ local match="$1"
+ local rc
+
+ $IP -6 rule add $match table $RTABLE 2>/dev/null
+ rc=$?
+ log_test $rc 2 "rule6 check: $match"
+
+ if [ $rc -eq 0 ]; then
+ $IP -6 rule del $match table $RTABLE
+ fi
}
fib_rule6_test()
{
+ local getmatch
+ local match
+ local cnt
+
# setup the fib rule redirect route
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
@@ -124,8 +143,21 @@ fib_rule6_test()
match="from $SRC_IP6 iif $DEV"
fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+ # Reject dsfield (tos) options which have ECN bits set
+ for cnt in $(seq 1 3); do
+ match="dsfield $cnt"
+ fib_rule6_test_reject "$match"
+ done
+
+ # Don't take ECN bits into account when matching on dsfield
match="tos 0x10"
- fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ # Using option 'tos' instead of 'dsfield' as old iproute2
+ # versions don't support 'dsfield' in ip rule show.
+ getmatch="tos $cnt"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getmatch redirect to table"
+ done
match="fwmark 0x64"
getmatch="mark 0x64"
@@ -165,7 +197,7 @@ fib_rule4_del()
fib_rule4_del_by_pref()
{
- pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1)
$IP rule del pref $pref
}
@@ -173,17 +205,36 @@ fib_rule4_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
+ local description="$3"
$IP rule add $match table $RTABLE
$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
- log_test $? 0 "rule4 check: $1"
+ log_test $? 0 "rule4 check: $description"
fib_rule4_del_by_pref "$match"
- log_test $? 0 "rule4 del by pref: $match"
+ log_test $? 0 "rule4 del by pref: $description"
+}
+
+fib_rule4_test_reject()
+{
+ local match="$1"
+ local rc
+
+ $IP rule add $match table $RTABLE 2>/dev/null
+ rc=$?
+ log_test $rc 2 "rule4 check: $match"
+
+ if [ $rc -eq 0 ]; then
+ $IP rule del $match table $RTABLE
+ fi
}
fib_rule4_test()
{
+ local getmatch
+ local match
+ local cnt
+
# setup the fib rule redirect route
$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
@@ -192,14 +243,27 @@ fib_rule4_test()
# need enable forwarding and disable rp_filter temporarily as all the
# addresses are in the same subnet and egress device == ingress device.
- ip netns exec testns sysctl -w net.ipv4.ip_forward=1
- ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0
+ ip netns exec testns sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
- ip netns exec testns sysctl -w net.ipv4.ip_forward=0
+ ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
+
+ # Reject dsfield (tos) options which have ECN bits set
+ for cnt in $(seq 1 3); do
+ match="dsfield $cnt"
+ fib_rule4_test_reject "$match"
+ done
+ # Don't take ECN bits into account when matching on dsfield
match="tos 0x10"
- fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ # Using option 'tos' instead of 'dsfield' as old iproute2
+ # versions don't support 'dsfield' in ip rule show.
+ getmatch="tos $cnt"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getmatch redirect to table"
+ done
match="fwmark 0x64"
getmatch="mark 0x64"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 996af1ae3d3d..bb73235976b3 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1447,6 +1447,81 @@ ipv4_local_rt_cache()
log_test $? 0 "Cached route removed from VRF port device"
}
+ipv4_rt_dsfield()
+{
+ echo
+ echo "IPv4 route with dsfield tests"
+
+ run_cmd "$IP route flush 172.16.102.0/24"
+
+ # New routes should reject dsfield options that interfere with ECN
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x01"
+
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x02"
+
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x03"
+
+ # A generic route that doesn't take DSCP into account
+ run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2"
+
+ # A more specific route for DSCP 0x10
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2"
+
+ # DSCP 0x10 should match the specific route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:CE"
+
+ # Unknown DSCP should match the generic route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
+
+ # Null DSCP should match the generic route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
+}
+
ipv4_route_test()
{
route_setup
@@ -1454,6 +1529,7 @@ ipv4_route_test()
ipv4_rt_add
ipv4_rt_replace
ipv4_local_rt_cache
+ ipv4_rt_dsfield
route_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index b90dff8d3a94..64bd00fe9a4f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -28,8 +28,9 @@ h2_destroy()
switch_create()
{
- # 10 Seconds ageing time.
- ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ ip link add dev br0 type bridge \
+ vlan_filtering 1 \
+ ageing_time $LOW_AGEING_TIME \
mcast_snooping 0
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index c15c6c85c984..1c8a26046589 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -27,8 +27,9 @@ h2_destroy()
switch_create()
{
- # 10 Seconds ageing time.
- ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+ ip link add dev br0 type bridge \
+ ageing_time $LOW_AGEING_TIME \
+ mcast_snooping 0
ip link set dev $swp1 master br0
ip link set dev $swp2 master br0
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index e134a5f529c9..1b3b46292179 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -99,15 +99,15 @@ fib_ipv4_tos_test()
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
check_err $? "Route not in hardware when should"
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 8 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 8 metric 1024" false
check_err $? "Highest TOS route not in hardware when should"
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
check_err $? "Lowest TOS route still in hardware when should not"
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4 metric 1024" true
check_err $? "Middle TOS route in hardware when should not"
log_test "IPv4 routes with TOS"
@@ -277,11 +277,11 @@ fib_ipv4_replay_tos_test()
ip -n $ns link set dev dummy1 up
ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4
devlink -N $ns dev reload $devlink_dev
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4" false
check_err $? "Highest TOS route not in hardware when should"
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index b0980a2efa31..4a546509de90 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -41,6 +41,8 @@ NETIF_CREATE=yes
# Timeout (in seconds) before ping exits regardless of how many packets have
# been sent or received
PING_TIMEOUT=5
+# Minimum ageing_time (in centiseconds) supported by hardware
+LOW_AGEING_TIME=1000
# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
# filter by software/hardware
TC_FLAG=skip_hw
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7da783d6f453..e7e434a4758b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -24,6 +24,7 @@ PING_COUNT=${PING_COUNT:=10}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
REQUIRE_JQ=${REQUIRE_JQ:=yes}
REQUIRE_MZ=${REQUIRE_MZ:=yes}
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
new file mode 100755
index 000000000000..d14efb2d23b2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_ip4_src
+ test_ip4_dst
+ test_ip6_src
+ test_ip6_dst
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_ip()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_ip6()
+{
+ local locus=$1; shift
+ local pedit_addr=$1; shift
+ local flower_addr=$1; shift
+
+ do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6 \
+ "$flower_addr 2001:db8:2::1" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+}
+
+do_test_pedit_ip4()
+{
+ local locus=$1; shift
+ local pedit_addr=$1; shift
+ local flower_addr=$1; shift
+
+ do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip \
+ "$flower_addr 198.51.100.1" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+}
+
+test_ip4_src()
+{
+ do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip
+ do_test_pedit_ip4 "dev $swp2 egress" "ip src" src_ip
+}
+
+test_ip4_dst()
+{
+ do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip
+ do_test_pedit_ip4 "dev $swp2 egress" "ip dst" dst_ip
+}
+
+test_ip6_src()
+{
+ do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip
+ do_test_pedit_ip6 "dev $swp2 egress" "ip6 src" src_ip
+}
+
+test_ip6_dst()
+{
+ do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip
+ do_test_pedit_ip6 "dev $swp2 egress" "ip6 dst" dst_ip
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c0801df15f54..18bb0d0cf4bd 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -15,6 +15,7 @@ timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
checksum=0
+ip_mptcp=0
do_all_tests=1
TEST_COUNT=0
@@ -239,6 +240,16 @@ is_v6()
[ -z "${1##*:*}" ]
}
+is_addr()
+{
+ [ -z "${1##*[.:]*}" ]
+}
+
+is_number()
+{
+ [[ $1 == ?(-)+([0-9]) ]]
+}
+
# $1: ns, $2: port
wait_local_port_listen()
{
@@ -278,6 +289,109 @@ wait_rm_addr()
done
}
+pm_nl_set_limits()
+{
+ local ns=$1
+ local addrs=$2
+ local subflows=$3
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
+ else
+ ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
+ fi
+}
+
+pm_nl_add_endpoint()
+{
+ local ns=$1
+ local addr=$2
+ local flags
+ local port
+ local dev
+ local id
+ local nr=2
+
+ for p in $@
+ do
+ if [ $p = "flags" ]; then
+ eval _flags=\$"$nr"
+ [ ! -z $_flags ]; flags="flags $_flags"
+ fi
+ if [ $p = "dev" ]; then
+ eval _dev=\$"$nr"
+ [ ! -z $_dev ]; dev="dev $_dev"
+ fi
+ if [ $p = "id" ]; then
+ eval _id=\$"$nr"
+ [ ! -z $_id ]; id="id $_id"
+ fi
+ if [ $p = "port" ]; then
+ eval _port=\$"$nr"
+ [ ! -z $_port ]; port="port $_port"
+ fi
+
+ let nr+=1
+ done
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
+ else
+ ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
+ fi
+}
+
+pm_nl_del_endpoint()
+{
+ local ns=$1
+ local id=$2
+ local addr=$3
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint delete id $id $addr
+ else
+ ip netns exec $ns ./pm_nl_ctl del $id $addr
+ fi
+}
+
+pm_nl_flush_endpoint()
+{
+ local ns=$1
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint flush
+ else
+ ip netns exec $ns ./pm_nl_ctl flush
+ fi
+}
+
+pm_nl_show_endpoints()
+{
+ local ns=$1
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint show
+ else
+ ip netns exec $ns ./pm_nl_ctl dump
+ fi
+}
+
+pm_nl_change_endpoint()
+{
+ local ns=$1
+ local flags=$2
+ local id=$3
+ local addr=$4
+ local port=""
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
+ else
+ if [ $5 -ne 0 ]; then port="port $5"; fi
+ ip netns exec $ns ./pm_nl_ctl set $addr flags $flags $port
+ fi
+}
+
do_transfer()
{
listener_ns="$1"
@@ -289,7 +403,7 @@ do_transfer()
addr_nr_ns1="$7"
addr_nr_ns2="$8"
speed="$9"
- bkup="${10}"
+ sflags="${10}"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
@@ -378,31 +492,36 @@ do_transfer()
else
addr="10.0.$counter.1"
fi
- ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal
+ pm_nl_add_endpoint $ns1 $addr flags signal
let counter+=1
let add_nr_ns1-=1
done
elif [ $addr_nr_ns1 -lt 0 ]; then
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
- counter=1
- pos=1
- dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
- if [ ${#dump[@]} -gt 0 ]; then
- while [ $counter -le $rm_nr_ns1 ]
- do
- id=${dump[$pos]}
- rm_addr=$(rm_addr_count ${connector_ns})
- ip netns exec ${listener_ns} ./pm_nl_ctl del $id
- wait_rm_addr ${connector_ns} ${rm_addr}
- let counter+=1
- let pos+=5
+ counter=0
+ pm_nl_show_endpoints ${listener_ns} | while read line; do
+ local arr=($line)
+ local nr=0
+
+ for i in ${arr[@]}; do
+ if [ $i = "id" ]; then
+ if [ $counter -eq $rm_nr_ns1 ]; then
+ break
+ fi
+ id=${arr[$nr+1]}
+ rm_addr=$(rm_addr_count ${connector_ns})
+ pm_nl_del_endpoint ${listener_ns} $id
+ wait_rm_addr ${connector_ns} ${rm_addr}
+ let counter+=1
+ fi
+ let nr+=1
done
- fi
+ done
elif [ $rm_nr_ns1 -eq 8 ]; then
- ip netns exec ${listener_ns} ./pm_nl_ctl flush
+ pm_nl_flush_endpoint ${listener_ns}
elif [ $rm_nr_ns1 -eq 9 ]; then
- ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
+ pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr}
fi
fi
@@ -426,30 +545,36 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
+ pm_nl_add_endpoint $ns2 $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
elif [ $addr_nr_ns2 -lt 0 ]; then
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
- counter=1
- pos=1
- dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
- if [ ${#dump[@]} -gt 0 ]; then
- while [ $counter -le $rm_nr_ns2 ]
- do
- # rm_addr are serialized, allow the previous one to complete
- id=${dump[$pos]}
- rm_addr=$(rm_addr_count ${listener_ns})
- ip netns exec ${connector_ns} ./pm_nl_ctl del $id
- wait_rm_addr ${listener_ns} ${rm_addr}
- let counter+=1
- let pos+=5
+ counter=0
+ pm_nl_show_endpoints ${connector_ns} | while read line; do
+ local arr=($line)
+ local nr=0
+
+ for i in ${arr[@]}; do
+ if [ $i = "id" ]; then
+ if [ $counter -eq $rm_nr_ns2 ]; then
+ break
+ fi
+ # rm_addr are serialized, allow the previous one to
+ # complete
+ id=${arr[$nr+1]}
+ rm_addr=$(rm_addr_count ${listener_ns})
+ pm_nl_del_endpoint ${connector_ns} $id
+ wait_rm_addr ${listener_ns} ${rm_addr}
+ let counter+=1
+ fi
+ let nr+=1
done
- fi
+ done
elif [ $rm_nr_ns2 -eq 8 ]; then
- ip netns exec ${connector_ns} ./pm_nl_ctl flush
+ pm_nl_flush_endpoint ${connector_ns}
elif [ $rm_nr_ns2 -eq 9 ]; then
local addr
if is_v6 "${connect_addr}"; then
@@ -457,19 +582,34 @@ do_transfer()
else
addr="10.0.1.2"
fi
- ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
+ pm_nl_del_endpoint ${connector_ns} 0 $addr
fi
fi
- if [ ! -z $bkup ]; then
+ if [ ! -z $sflags ]; then
sleep 1
for netns in "$ns1" "$ns2"; do
- dump=(`ip netns exec $netns ./pm_nl_ctl dump`)
- if [ ${#dump[@]} -gt 0 ]; then
- addr=${dump[${#dump[@]} - 1]}
- backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup"
- $backup
- fi
+ pm_nl_show_endpoints $netns | while read line; do
+ local arr=($line)
+ local addr
+ local port=0
+ local id
+
+ for i in ${arr[@]}; do
+ if is_addr $i; then
+ addr=$i
+ elif is_number $i; then
+ # The minimum expected port number is 10000
+ if [ $i -gt 10000 ]; then
+ port=$i
+ # The maximum id number is 255
+ elif [ $i -lt 255 ]; then
+ id=$i
+ fi
+ fi
+ done
+ pm_nl_change_endpoint $netns $sflags $id $addr $port
+ done
done
fi
@@ -545,7 +685,7 @@ run_tests()
addr_nr_ns1="${5:-0}"
addr_nr_ns2="${6:-0}"
speed="${7:-fast}"
- bkup="${8:-""}"
+ sflags="${8:-""}"
lret=0
oldin=""
@@ -574,7 +714,7 @@ run_tests()
fi
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
- ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+ ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags}
lret=$?
}
@@ -978,51 +1118,51 @@ subflows_tests()
# subflow limited by client
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 0
- ip netns exec $ns2 ./pm_nl_ctl limits 0 0
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_set_limits $ns2 0 0
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow, limited by client" 0 0 0
# subflow limited by server
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 0
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow, limited by server" 1 1 0
# subflow
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow" 1 1 1
# multiple subflows
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows" 2 2 2
# multiple subflows limited by server
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows, limited by server" 2 2 1
# single subflow, dev
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow, dev" 1 1 1
}
@@ -1032,28 +1172,28 @@ subflows_error_tests()
# If a single subflow is configured, and matches the MPC src
# address, no additional subflow should be created
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "no MPC reuse with single endpoint" 0 0 0
# multiple subflows, with subflow creation error
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "multi subflows, with failing subflow" 1 1 1
# multiple subflows, with subflow timeout on MPJ
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "multi subflows, with subflow timeout" 1 1 1
@@ -1062,9 +1202,9 @@ subflows_error_tests()
# closed subflow (due to reset) is not reused if additional
# subflows are added later
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
@@ -1074,7 +1214,7 @@ subflows_error_tests()
# mpj subflow will be in TW after the reset
wait_for_tw $ns2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
wait
# additional subflow could be created only if the PM select
@@ -1086,16 +1226,16 @@ signal_address_tests()
{
# add_address, unused
reset
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "unused signal address" 0 0 0
chk_add_nr 1 1
# accept and use add_addr
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address" 1 1 1
chk_add_nr 1 1
@@ -1105,59 +1245,59 @@ signal_address_tests()
# belong to different subnets or one of the listed local address could be
# used for 'add_addr' subflow
reset
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal" 2 2 2
chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
chk_add_nr 1 1
# signal addresses
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal addresses" 3 3 3
chk_add_nr 3 3
# signal invalid addresses
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3
# signal addresses race test
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 4 4
- ip netns exec $ns2 ./pm_nl_ctl limits 4 4
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_set_limits $ns2 4 4
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal addresses race test" 3 3 3
@@ -1178,11 +1318,11 @@ link_failure_tests()
# active backup and link switch-over.
# Let's set some arbitrary (low) virtual link limits.
init_shapers
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1
@@ -1192,11 +1332,11 @@ link_failure_tests()
# for bidirectional transfer
reset
init_shapers
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 2
chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
chk_add_nr 1 1
@@ -1206,11 +1346,11 @@ link_failure_tests()
# will never be used
reset
init_shapers
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 2
export FAILING_LINKS="1"
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "backup subflow unused, link failure" 2 2 2
chk_add_nr 1 1
@@ -1220,10 +1360,10 @@ link_failure_tests()
# the traffic
reset
init_shapers
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
export FAILING_LINKS="1 2"
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "backup flow used, multi links fail" 2 2 2
@@ -1235,10 +1375,10 @@ link_failure_tests()
# for bidirectional transfer
reset
init_shapers
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 2
chk_join_nr "backup flow used, bidi, link failure" 2 2 2
chk_add_nr 1 1
@@ -1250,38 +1390,38 @@ add_addr_timeout_tests()
{
# add_addr timeout
reset_with_add_addr_timeout
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
chk_add_nr 4 0
# add_addr timeout IPv6
reset_with_add_addr_timeout 6
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
chk_add_nr 4 0
# signal addresses timeout
reset_with_add_addr_timeout
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_set_limits $ns2 2 2
run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
chk_add_nr 8 0
# signal invalid addresses timeout
reset_with_add_addr_timeout
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_set_limits $ns2 2 2
run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
chk_add_nr 8 0
@@ -1291,28 +1431,28 @@ remove_tests()
{
# single subflow, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
chk_join_nr "remove single subflow" 1 1 1
chk_rm_nr 1 1
# multiple subflows, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
chk_join_nr "remove multiple subflows" 2 2 2
chk_rm_nr 2 2
# single address, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address" 1 1 1
chk_add_nr 1 1
@@ -1320,10 +1460,10 @@ remove_tests()
# subflow and signal, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
chk_join_nr "remove subflow and signal" 2 2 2
chk_add_nr 1 1
@@ -1331,11 +1471,11 @@ remove_tests()
# subflows and signal, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
chk_join_nr "remove subflows and signal" 3 3 3
chk_add_nr 1 1
@@ -1343,11 +1483,11 @@ remove_tests()
# addresses remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
chk_join_nr "remove addresses" 3 3 3
chk_add_nr 3 3
@@ -1355,11 +1495,11 @@ remove_tests()
# invalid addresses remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
chk_join_nr "remove invalid addresses" 1 1 1
chk_add_nr 3 3
@@ -1367,11 +1507,11 @@ remove_tests()
# subflows and signal, flush
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
chk_join_nr "flush subflows and signal" 3 3 3
chk_add_nr 1 1
@@ -1379,22 +1519,22 @@ remove_tests()
# subflows flush
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_set_limits $ns2 3 3
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
chk_join_nr "flush subflows" 3 3 3
chk_rm_nr 3 3
# addresses flush
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
chk_join_nr "flush addresses" 3 3 3
chk_add_nr 3 3
@@ -1402,11 +1542,11 @@ remove_tests()
# invalid addresses flush
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 3 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
chk_join_nr "flush invalid addresses" 1 1 1
chk_add_nr 3 3
@@ -1414,18 +1554,18 @@ remove_tests()
# remove id 0 subflow
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
chk_join_nr "remove id 0 subflow" 1 1 1
chk_rm_nr 1 1
# remove id 0 address
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
chk_join_nr "remove id 0 address" 1 1 1
chk_add_nr 1 1
@@ -1436,37 +1576,37 @@ add_tests()
{
# add single subflow
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
chk_join_nr "add single subflow" 1 1 1
# add signal address
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
chk_join_nr "add signal address" 1 1 1
chk_add_nr 1 1
# add multiple subflows
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
chk_join_nr "add multiple subflows" 2 2 2
# add multiple subflows IPv6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
chk_join_nr "add multiple subflows IPv6" 2 2 2
# add multiple addresses IPv6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
chk_join_nr "add multiple addresses IPv6" 2 2 2
chk_add_nr 2 2
@@ -1476,33 +1616,33 @@ ipv6_tests()
{
# subflow IPv6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "single subflow IPv6" 1 1 1
# add_address, unused IPv6
reset
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "unused signal address IPv6" 0 0 0
chk_add_nr 1 1
# signal address IPv6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "single address IPv6" 1 1 1
chk_add_nr 1 1
# single address IPv6, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
chk_join_nr "remove single address IPv6" 1 1 1
chk_add_nr 1 1
@@ -1510,10 +1650,10 @@ ipv6_tests()
# subflow and signal IPv6, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
chk_join_nr "remove subflow and signal IPv6" 2 2 2
chk_add_nr 1 1
@@ -1524,76 +1664,76 @@ v4mapped_tests()
{
# subflow IPv4-mapped to IPv4-mapped
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
run_tests $ns1 $ns2 "::ffff:10.0.1.1"
chk_join_nr "single subflow IPv4-mapped" 1 1 1
# signal address IPv4-mapped with IPv4-mapped sk
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
run_tests $ns1 $ns2 "::ffff:10.0.1.1"
chk_join_nr "signal address IPv4-mapped" 1 1 1
chk_add_nr 1 1
# subflow v4-map-v6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 "::ffff:10.0.1.1"
chk_join_nr "single subflow v4-map-v6" 1 1 1
# signal address v4-map-v6
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 "::ffff:10.0.1.1"
chk_join_nr "signal address v4-map-v6" 1 1 1
chk_add_nr 1 1
# subflow v6-map-v4
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow v6-map-v4" 1 1 1
# signal address v6-map-v4
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address v6-map-v4" 1 1 1
chk_add_nr 1 1
# no subflow IPv6 to v4 address
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
# no subflow IPv4 to v6 address, no need to slow down too then
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
}
@@ -1602,50 +1742,60 @@ backup_tests()
{
# single subflow, backup
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
chk_join_nr "single subflow, backup" 1 1 1
chk_prio_nr 0 1
# single address, backup
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
chk_join_nr "single address, backup" 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 0
+
+ # single address with port, backup
+ reset
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 1
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ chk_join_nr "single address with port, backup" 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 0
}
add_addr_ports_tests()
{
# signal address with port
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address with port" 1 1 1
chk_add_nr 1 1 1
# subflow and signal with port
reset
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal with port" 2 2 2
chk_add_nr 1 1 1
# single address with port, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address with port" 1 1 1
chk_add_nr 1 1 1
@@ -1653,10 +1803,10 @@ add_addr_ports_tests()
# subflow and signal with port, remove
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
chk_join_nr "remove subflow and signal with port" 2 2 2
chk_add_nr 1 1 1
@@ -1664,11 +1814,11 @@ add_addr_ports_tests()
# subflows and signal with port, flush
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
chk_join_nr "flush subflows and signal with port" 3 3 3
chk_add_nr 1 1
@@ -1676,20 +1826,20 @@ add_addr_ports_tests()
# multiple addresses with port
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+ pm_nl_set_limits $ns2 2 2
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple addresses with port" 2 2 2
chk_add_nr 2 2 2
# multiple addresses with ports
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+ pm_nl_set_limits $ns2 2 2
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple addresses with ports" 2 2 2
chk_add_nr 2 2 2
@@ -1699,56 +1849,56 @@ syncookies_tests()
{
# single subflow, syncookies
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow with syn cookies" 1 1 1
# multiple subflows with syn cookies
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows with syn cookies" 2 2 2
# multiple subflows limited by server
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows limited by server w cookies" 2 1 1
# test signal address with cookies
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address with syn cookies" 1 1 1
chk_add_nr 1 1
# test cookie with subflow and signal
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl limits 0 2
- ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal w cookies" 2 2 2
chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset_with_cookies
- ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows and signal w. cookies" 3 3 3
chk_add_nr 1 1
@@ -1758,29 +1908,29 @@ checksum_tests()
{
# checksum test 0 0
reset_with_checksum 0 0
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 0 0"
# checksum test 1 1
reset_with_checksum 1 1
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 1 1"
# checksum test 0 1
reset_with_checksum 0 1
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 0 1"
# checksum test 1 0
reset_with_checksum 1 0
- ip netns exec $ns1 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 1 0"
}
@@ -1789,26 +1939,26 @@ deny_join_id0_tests()
{
# subflow allow join id0 ns1
reset_with_allow_join_id0 1 0
- ip netns exec $ns1 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow allow join id0 ns1" 1 1 1
# subflow allow join id0 ns2
reset_with_allow_join_id0 0 1
- ip netns exec $ns1 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow allow join id0 ns2" 0 0 0
# signal address allow join id0 ns1
# ADD_ADDRs are not affected by allow_join_id0 value.
reset_with_allow_join_id0 1 0
- ip netns exec $ns1 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address allow join id0 ns1" 1 1 1
chk_add_nr 1 1
@@ -1816,28 +1966,28 @@ deny_join_id0_tests()
# signal address allow join id0 ns2
# ADD_ADDRs are not affected by allow_join_id0 value.
reset_with_allow_join_id0 0 1
- ip netns exec $ns1 ./pm_nl_ctl limits 1 1
- ip netns exec $ns2 ./pm_nl_ctl limits 1 1
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address allow join id0 ns2" 1 1 1
chk_add_nr 1 1
# subflow and address allow join id0 ns1
reset_with_allow_join_id0 1 0
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and address allow join id0 1" 2 2 2
# subflow and address allow join id0 ns2
reset_with_allow_join_id0 0 1
- ip netns exec $ns1 ./pm_nl_ctl limits 2 2
- ip netns exec $ns2 ./pm_nl_ctl limits 2 2
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
@@ -1848,10 +1998,10 @@ fullmesh_tests()
# 2 fullmesh addrs in ns2, added before the connection,
# 1 non-fullmesh addr in ns1, added during the connection.
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 0 4
- ip netns exec $ns2 ./pm_nl_ctl limits 1 4
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 1 4
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
chk_join_nr "fullmesh test 2x1" 4 4 4
chk_add_nr 1 1
@@ -1860,9 +2010,9 @@ fullmesh_tests()
# 1 non-fullmesh addr in ns1, added before the connection,
# 1 fullmesh addr in ns2, added during the connection.
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 1 3
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
chk_join_nr "fullmesh test 1x1" 3 3 3
chk_add_nr 1 1
@@ -1871,9 +2021,9 @@ fullmesh_tests()
# 1 non-fullmesh addr in ns1, added before the connection,
# 2 fullmesh addrs in ns2, added during the connection.
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 2 5
- ip netns exec $ns2 ./pm_nl_ctl limits 1 5
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 2 5
+ pm_nl_set_limits $ns2 1 5
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
chk_join_nr "fullmesh test 1x2" 5 5 5
chk_add_nr 1 1
@@ -1883,12 +2033,50 @@ fullmesh_tests()
# 2 fullmesh addrs in ns2, added during the connection,
# limit max_subflows to 4.
reset
- ip netns exec $ns1 ./pm_nl_ctl limits 2 4
- ip netns exec $ns2 ./pm_nl_ctl limits 1 4
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 2 4
+ pm_nl_set_limits $ns2 1 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
chk_join_nr "fullmesh test 1x2, limited" 4 4 4
chk_add_nr 1 1
+
+ # set fullmesh flag
+ reset
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+ pm_nl_set_limits $ns2 4 4
+ run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
+ chk_join_nr "set fullmesh flag test" 2 2 2
+ chk_rm_nr 0 1
+
+ # set nofullmesh flag
+ reset
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+ pm_nl_set_limits $ns2 4 4
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
+ chk_join_nr "set nofullmesh flag test" 2 2 2
+ chk_rm_nr 0 1
+
+ # set backup,fullmesh flags
+ reset
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+ pm_nl_set_limits $ns2 4 4
+ run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
+ chk_join_nr "set backup,fullmesh flags test" 2 2 2
+ chk_prio_nr 0 1
+ chk_rm_nr 0 1
+
+ # set nobackup,nofullmesh flags
+ reset
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_set_limits $ns2 4 4
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
+ chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
+ chk_prio_nr 0 1
+ chk_rm_nr 0 1
}
all_tests()
@@ -1930,6 +2118,7 @@ usage()
echo " -m fullmesh_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
+ echo " -i use ip mptcp"
echo " -h help"
}
@@ -1951,9 +2140,12 @@ for arg in "$@"; do
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
checksum=1
fi
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"i"[0-9a-zA-Z]*$ ]]; then
+ ip_mptcp=1
+ fi
- # exception for the capture/checksum options, the rest means: a part of the tests
- if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
+ # exception for the capture/checksum/ip_mptcp options, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ] && [ "${arg}" != "-i" ]; then
do_all_tests=0
fi
done
@@ -1963,7 +2155,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fesltra64bpkdmchCS' opt; do
+while getopts 'fesltra64bpkdmchCSi' opt; do
case $opt in
f)
subflows_tests
@@ -2014,6 +2206,8 @@ while getopts 'fesltra64bpkdmchCS' opt; do
;;
C)
;;
+ i)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index cbacf9f6538b..89839d1ff9d8 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -164,4 +164,22 @@ id 253 flags 10.0.0.5
id 254 flags 10.0.0.2
id 255 flags 10.0.0.3" "wrap-around ids"
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup 10.0.1.1" "set flags (backup)"
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" " (nobackup)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,fullmesh 10.0.1.1" " (fullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" " (nofullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 354784512748..22a5ec1e128e 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
- fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
+ fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -657,8 +657,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
u_int32_t flags = 0;
u_int16_t family;
int nest_start;
+ int use_id = 0;
+ u_int8_t id;
int off = 0;
- int arg;
+ int arg = 2;
memset(data, 0, sizeof(data));
nh = (void *)data;
@@ -674,29 +676,45 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
nest->rta_len = RTA_LENGTH(0);
off += NLMSG_ALIGN(nest->rta_len);
- /* addr data */
- rta = (void *)(data + off);
- if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
- family = AF_INET;
- rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
- rta->rta_len = RTA_LENGTH(4);
- } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
- family = AF_INET6;
- rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
- rta->rta_len = RTA_LENGTH(16);
+ if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ use_id = 1;
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
} else {
- error(1, errno, "can't parse ip %s", argv[2]);
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", argv[arg]);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
}
- off += NLMSG_ALIGN(rta->rta_len);
- /* family */
- rta = (void *)(data + off);
- rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
- rta->rta_len = RTA_LENGTH(2);
- memcpy(RTA_DATA(rta), &family, 2);
- off += NLMSG_ALIGN(rta->rta_len);
+ if (++arg >= argc)
+ error(1, 0, " missing flags keyword");
- for (arg = 3; arg < argc; arg++) {
+ for (; arg < argc; arg++) {
if (!strcmp(argv[arg], "flags")) {
char *tok, *str;
@@ -704,12 +722,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
if (++arg >= argc)
error(1, 0, " missing flags value");
- /* do not support flag list yet */
for (str = argv[arg]; (tok = strtok(str, ","));
str = NULL) {
if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else if (strcmp(tok, "nobackup"))
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+ else if (strcmp(tok, "nobackup") &&
+ strcmp(tok, "nofullmesh"))
error(1, errno,
"unknown flag %s", argv[arg]);
}
@@ -719,6 +739,21 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(4);
memcpy(RTA_DATA(rta), &flags, 4);
off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "port")) {
+ u_int16_t port;
+
+ if (use_id)
+ error(1, 0, " port can't be used with id");
+
+ if (++arg >= argc)
+ error(1, 0, " missing port value");
+
+ port = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
} else {
error(1, 0, "unknown keyword %s", argv[arg]);
}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index c9ce3dfa42ee..0900c5438fbb 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -216,9 +216,9 @@ kci_test_route_get()
check_err $?
ip route get fe80::1 dev "$devdummy" > /dev/null
check_err $?
- ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+ ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 > /dev/null
check_err $?
- ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+ ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 > /dev/null
check_err $?
ip addr add dev "$devdummy" 10.23.7.11/24
check_err $?
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aee631c5284e..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -325,8 +325,8 @@ int main(int argc, char **argv)
struct ifreq device;
struct ifreq hwtstamp;
struct hwtstamp_config hwconfig, hwconfig_requested;
- struct so_timestamping so_timestamping_get = { 0, -1 };
- struct so_timestamping so_timestamping = { 0, -1 };
+ struct so_timestamping so_timestamping_get = { 0, 0 };
+ struct so_timestamping so_timestamping = { 0, 0 };
struct sockaddr_in addr;
struct ip_mreq imr;
struct in_addr iaddr;