summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--.mailmap6
-rw-r--r--Documentation/admin-guide/index.rst1
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/admin-guide/reporting-issues.rst4
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst20
-rw-r--r--Documentation/bpf/clang-notes.rst6
-rw-r--r--Documentation/bpf/cpumasks.rst30
-rw-r--r--Documentation/bpf/instruction-set.rst129
-rw-r--r--Documentation/bpf/kfuncs.rst124
-rw-r--r--Documentation/bpf/libbpf/index.rst25
-rw-r--r--Documentation/bpf/libbpf/libbpf_overview.rst228
-rw-r--r--Documentation/bpf/linux-notes.rst30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml (renamed from Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml)6
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca8k.yaml22
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml37
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy.yaml43
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-switch.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.txt66
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml111
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml26
-rw-r--r--Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml144
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,scif.yaml4
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.rst6
-rw-r--r--Documentation/kbuild/llvm.rst4
-rw-r--r--Documentation/leds/well-known-leds.txt30
-rw-r--r--Documentation/mm/zsmalloc.rst135
-rw-r--r--Documentation/netlink/specs/handshake.yaml124
-rw-r--r--Documentation/networking/device_drivers/ethernet/amd/pds_core.rst139
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--Documentation/networking/devlink/ice.rst15
-rw-r--r--Documentation/networking/driver.rst156
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/ip-sysctl.rst9
-rw-r--r--Documentation/networking/page_pool.rst1
-rw-r--r--Documentation/networking/tls-handshake.rst217
-rw-r--r--Documentation/process/howto.rst2
-rw-r--r--Documentation/process/index.rst9
-rw-r--r--Documentation/process/researcher-guidelines.rst2
-rw-r--r--Documentation/process/security-bugs.rst (renamed from Documentation/admin-guide/security-bugs.rst)0
-rw-r--r--Documentation/process/stable-kernel-rules.rst2
-rw-r--r--Documentation/process/submitting-patches.rst2
-rw-r--r--Documentation/riscv/vm-layout.rst6
-rw-r--r--Documentation/rust/arch-support.rst2
-rw-r--r--Documentation/sound/hd-audio/models.rst2
-rw-r--r--Documentation/translations/it_IT/admin-guide/security-bugs.rst2
-rw-r--r--Documentation/translations/it_IT/process/submitting-patches.rst2
-rw-r--r--Documentation/translations/ja_JP/howto.rst2
-rw-r--r--Documentation/translations/ko_KR/howto.rst2
-rw-r--r--Documentation/translations/sp_SP/howto.rst2
-rw-r--r--Documentation/translations/sp_SP/process/submitting-patches.rst2
-rw-r--r--Documentation/translations/zh_CN/admin-guide/security-bugs.rst2
-rw-r--r--Documentation/translations/zh_CN/process/howto.rst2
-rw-r--r--Documentation/translations/zh_TW/admin-guide/security-bugs.rst2
-rw-r--r--Documentation/translations/zh_TW/process/howto.rst2
-rw-r--r--Documentation/virt/kvm/api.rst4
-rw-r--r--MAINTAINERS55
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/armada-370-rd.dts12
-rw-r--r--arch/arm/boot/dts/imx6ull-colibri.dtsi12
-rw-r--r--arch/arm/boot/dts/imx7d-remarkable2.dts2
-rw-r--r--arch/arm/boot/dts/qcom-ipq8064-rb3011.dts124
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi2
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi15
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-verdin-dev.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/ipq8074-hk01.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/ipq8074-hk10.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/qrb5165-rb5.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-mtp.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3326-anbernic-rg351m.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3326-odroid-go.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3326-odroid-go2-v11.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi12
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi9
-rw-r--r--arch/arm64/kernel/compat_alignment.c32
-rw-r--r--arch/arm64/kvm/arm.c27
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c7
-rw-r--r--arch/arm64/kvm/pmu-emul.c1
-rw-r--r--arch/arm64/kvm/sys_regs.c1
-rw-r--r--arch/arm64/net/bpf_jit.h4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c3
-rw-r--r--arch/loongarch/Kconfig16
-rw-r--r--arch/loongarch/include/asm/acpi.h3
-rw-r--r--arch/loongarch/include/asm/addrspace.h4
-rw-r--r--arch/loongarch/include/asm/bootinfo.h1
-rw-r--r--arch/loongarch/include/asm/cpu-features.h1
-rw-r--r--arch/loongarch/include/asm/cpu.h40
-rw-r--r--arch/loongarch/include/asm/io.h4
-rw-r--r--arch/loongarch/include/asm/loongarch.h6
-rw-r--r--arch/loongarch/include/asm/module.lds.h8
-rw-r--r--arch/loongarch/include/uapi/asm/ptrace.h3
-rw-r--r--arch/loongarch/kernel/cpu-probe.c9
-rw-r--r--arch/loongarch/kernel/proc.c1
-rw-r--r--arch/loongarch/kernel/ptrace.c25
-rw-r--r--arch/loongarch/kernel/setup.c25
-rw-r--r--arch/loongarch/kernel/stacktrace.c2
-rw-r--r--arch/loongarch/kernel/unwind.c1
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c4
-rw-r--r--arch/loongarch/mm/init.c4
-rw-r--r--arch/loongarch/net/bpf_jit.c4
-rw-r--r--arch/loongarch/power/suspend_asm.S4
-rw-r--r--arch/mips/bmips/dma.c5
-rw-r--r--arch/mips/bmips/setup.c8
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h9
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/powerpc/mm/numa.c1
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c7
-rw-r--r--arch/powerpc/platforms/pseries/vas.c8
-rw-r--r--arch/riscv/Kconfig12
-rw-r--r--arch/riscv/Kconfig.erratas6
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi1
-rw-r--r--arch/riscv/include/asm/fixmap.h8
-rw-r--r--arch/riscv/include/asm/hwcap.h50
-rw-r--r--arch/riscv/include/asm/pgtable.h8
-rw-r--r--arch/riscv/kernel/setup.c6
-rw-r--r--arch/riscv/kernel/signal.c9
-rw-r--r--arch/riscv/mm/init.c82
-rw-r--r--arch/riscv/purgatory/Makefile7
-rw-r--r--arch/s390/kvm/intercept.c32
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/x86/Makefile.um5
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c9
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kvm/ioapic.c36
-rw-r--r--arch/x86/kvm/kvm_onhyperv.h5
-rw-r--r--arch/x86/kvm/svm/svm.c37
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h15
-rw-r--r--arch/x86/kvm/vmx/nested.c7
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--arch/x86/pci/fixup.c21
-rw-r--r--arch/x86/purgatory/Makefile3
-rw-r--r--block/blk-mq.c4
-rw-r--r--block/genhd.c8
-rw-r--r--drivers/accel/habanalabs/common/hwmon.c2
-rw-r--r--drivers/accel/ivpu/ivpu_job.c18
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c26
-rw-r--r--drivers/acpi/acpi_video.c15
-rw-r--r--drivers/acpi/bus.c83
-rw-r--r--drivers/acpi/resource.c7
-rw-r--r--drivers/acpi/video_detect.c58
-rw-r--r--drivers/acpi/x86/utils.c1
-rw-r--r--drivers/base/cacheinfo.c16
-rw-r--r--drivers/block/loop.c18
-rw-r--r--drivers/block/ublk_drv.c26
-rw-r--r--drivers/block/virtio_blk.c269
-rw-r--r--drivers/bluetooth/btbcm.c2
-rw-r--r--drivers/bluetooth/btsdio.c1
-rw-r--r--drivers/bus/imx-weim.c6
-rw-r--r--drivers/clk/clk-renesas-pcie.c3
-rw-r--r--drivers/clk/imx/clk-imx6ul.c10
-rw-r--r--drivers/clk/sprd/common.c9
-rw-r--r--drivers/counter/104-quad-8.c31
-rw-r--r--drivers/cpufreq/amd-pstate.c18
-rw-r--r--drivers/cxl/core/hdm.c126
-rw-r--r--drivers/cxl/core/pci.c38
-rw-r--r--drivers/cxl/core/pmem.c6
-rw-r--r--drivers/cxl/core/port.c38
-rw-r--r--drivers/cxl/core/region.c33
-rw-r--r--drivers/cxl/cxl.h8
-rw-r--r--drivers/cxl/cxlpci.h14
-rw-r--r--drivers/cxl/port.c4
-rw-r--r--drivers/dma/apple-admac.c20
-rw-r--r--drivers/dma/dmaengine.c2
-rw-r--r--drivers/dma/xilinx/xdma.c2
-rw-r--r--drivers/firmware/psci/psci.c3
-rw-r--r--drivers/gpio/Kconfig2
-rw-r--r--drivers/gpio/gpio-davinci.c5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c57
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c87
-rw-r--r--drivers/gpu/drm/armada/armada_drv.c1
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c20
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h7
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c6
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c32
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dp.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c1
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c1
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c11
-rw-r--r--drivers/hid/Kconfig2
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.c3
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-input.c6
-rw-r--r--drivers/hid/hid-sensor-custom.c2
-rw-r--r--drivers/hid/hid-topre.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/bus.c4
-rw-r--r--drivers/hv/connection.c4
-rw-r--r--drivers/hwmon/hwmon.c4
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-core.c24
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x.h20
-rw-r--r--drivers/i2c/busses/i2c-mchp-pci1xxxx.c60
-rw-r--r--drivers/i2c/busses/i2c-ocores.c35
-rw-r--r--drivers/i2c/i2c-core-of.c5
-rw-r--r--drivers/iio/accel/kionix-kx022a.c2
-rw-r--r--drivers/iio/adc/ad7791.c2
-rw-r--r--drivers/iio/adc/ltc2497.c6
-rw-r--r--drivers/iio/adc/max11410.c22
-rw-r--r--drivers/iio/adc/palmas_gpadc.c2
-rw-r--r--drivers/iio/adc/qcom-spmi-adc5.c10
-rw-r--r--drivers/iio/adc/ti-ads7950.c1
-rw-r--r--drivers/iio/dac/cio-dac.c4
-rw-r--r--drivers/iio/imu/Kconfig1
-rw-r--r--drivers/iio/industrialio-buffer.c21
-rw-r--r--drivers/iio/light/cm32181.c12
-rw-r--r--drivers/iio/light/vcnl4000.c3
-rw-r--r--drivers/infiniband/core/cma.c60
-rw-r--r--drivers/infiniband/core/verbs.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/irdma/cm.c16
-rw-r--r--drivers/infiniband/hw/irdma/cm.h2
-rw-r--r--drivers/infiniband/hw/irdma/hw.c3
-rw-r--r--drivers/infiniband/hw/irdma/utils.c5
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4
-rw-r--r--drivers/input/joystick/xpad.c7
-rw-r--r--drivers/input/mouse/alps.c16
-rw-r--r--drivers/input/mouse/focaltech.c8
-rw-r--r--drivers/input/serio/i8042-acpipnpio.h36
-rw-r--r--drivers/input/touchscreen/goodix.c14
-rw-r--r--drivers/iommu/exynos-iommu.c17
-rw-r--r--drivers/iommu/intel/dmar.c3
-rw-r--r--drivers/iommu/intel/iommu.h2
-rw-r--r--drivers/iommu/intel/irq_remapping.c6
-rw-r--r--drivers/iommu/intel/perfmon.c68
-rw-r--r--drivers/iommu/iommufd/pages.c16
-rw-r--r--drivers/md/md.c3
-rw-r--r--drivers/media/i2c/imx290.c6
-rw-r--r--drivers/media/platform/qcom/venus/firmware.c4
-rw-r--r--drivers/memstick/core/memstick.c5
-rw-r--r--drivers/mmc/host/sdhci_am654.c2
-rw-r--r--drivers/mtd/mtdblock.c12
-rw-r--r--drivers/mtd/nand/raw/meson_nand.c6
-rw-r--r--drivers/mtd/nand/raw/stm32_fmc2_nand.c3
-rw-r--r--drivers/mtd/spi-nor/core.c14
-rw-r--r--drivers/mtd/spi-nor/core.h2
-rw-r--r--drivers/mtd/spi-nor/debugfs.c11
-rw-r--r--drivers/mtd/ubi/build.c21
-rw-r--r--drivers/mtd/ubi/wl.c4
-rw-r--r--drivers/net/bonding/bond_main.c42
-rw-r--r--drivers/net/dsa/microchip/ksz8.h8
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c181
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c2
-rw-r--r--drivers/net/dsa/mt7530-mdio.c16
-rw-r--r--drivers/net/dsa/mt7530.c6
-rw-r--r--drivers/net/dsa/mt7530.h4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c6
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c20
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h1
-rw-r--r--drivers/net/dsa/ocelot/felix.c5
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c43
-rw-r--r--drivers/net/dsa/qca/Kconfig8
-rw-r--r--drivers/net/dsa/qca/Makefile3
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c20
-rw-r--r--drivers/net/dsa/qca/qca8k-leds.c277
-rw-r--r--drivers/net/dsa/qca/qca8k.h74
-rw-r--r--drivers/net/dsa/qca/qca8k_leds.h16
-rw-r--r--drivers/net/ethernet/amd/Kconfig12
-rw-r--r--drivers/net/ethernet/amd/Makefile1
-rw-r--r--drivers/net/ethernet/amd/pds_core/Makefile14
-rw-r--r--drivers/net/ethernet/amd/pds_core/adminq.c290
-rw-r--r--drivers/net/ethernet/amd/pds_core/auxbus.c264
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c597
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h312
-rw-r--r--drivers/net/ethernet/amd/pds_core/debugfs.c170
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c351
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c183
-rw-r--r--drivers/net/ethernet/amd/pds_core/fw.c194
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c475
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c53
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h9
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c16
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c19
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c29
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c6
-rw-r--r--drivers/net/ethernet/cadence/macb.h6
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c21
-rw-r--r--drivers/net/ethernet/cadence/macb_ptp.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c2
-rw-r--r--drivers/net/ethernet/freescale/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c20
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h4
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c110
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h7
-rw-r--r--drivers/net/ethernet/freescale/fec.h5
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c32
-rw-r--r--drivers/net/ethernet/google/gve/gve.h2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c16
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c51
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c9
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h20
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c44
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c68
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c23
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c42
-rw-r--r--drivers/net/ethernet/marvell/Kconfig1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c14
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c40
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c522
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c286
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c204
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c215
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c279
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c287
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c1126
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h181
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c92
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c273
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c241
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c270
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h2
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c294
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c2
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_bpf.c22
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c383
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c60
-rw-r--r--drivers/net/ethernet/mscc/ocelot.h15
-rw-r--r--drivers/net/ethernet/mscc/ocelot_io.c50
-rw-r--r--drivers/net/ethernet/mscc/ocelot_mm.c107
-rw-r--r--drivers/net/ethernet/mscc/ocelot_stats.c42
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_hwmon.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_port.c1
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_phc.c5
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c8
-rw-r--r--drivers/net/ethernet/qualcomm/Kconfig1
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c51
-rw-r--r--drivers/net/ethernet/sfc/efx.c1
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/chain_mode.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c178
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c197
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c171
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c60
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c36
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h101
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c50
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c201
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h92
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c105
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c71
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h176
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/ring_mode.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c119
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c3
-rw-r--r--drivers/net/ethernet/sun/niu.c2
-rw-r--r--drivers/net/ethernet/sun/sunhme.c7
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c48
-rw-r--r--drivers/net/ethernet/ti/cpsw.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c3
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c5
-rw-r--r--drivers/net/fddi/skfp/rmt.c6
-rw-r--r--drivers/net/hamradio/Kconfig2
-rw-r--r--drivers/net/macsec.c14
-rw-r--r--drivers/net/mdio/Kconfig3
-rw-r--r--drivers/net/phy/Kconfig8
-rw-r--r--drivers/net/phy/aquantia_hwmon.c2
-rw-r--r--drivers/net/phy/bcm54140.c2
-rw-r--r--drivers/net/phy/marvell.c83
-rw-r--r--drivers/net/phy/marvell10g.c2
-rw-r--r--drivers/net/phy/micrel.c6
-rw-r--r--drivers/net/phy/mxl-gpy.c2
-rw-r--r--drivers/net/phy/nxp-c45-tja11xx.c14
-rw-r--r--drivers/net/phy/nxp-tja11xx.c2
-rw-r--r--drivers/net/phy/phy_device.c104
-rw-r--r--drivers/net/phy/phylink.c19
-rw-r--r--drivers/net/phy/sfp.c25
-rw-r--r--drivers/net/thunderbolt/main.c25
-rw-r--r--drivers/net/usb/r8152.c2
-rw-r--r--drivers/net/veth.c27
-rw-r--r--drivers/net/virtio_net.c8
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/mhi.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c36
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/main.c10
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mac.c70
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/main.c15
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_util.c18
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/main.c13
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/init.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/main.c13
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/pci.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/main.c13
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_pcie.c3
-rw-r--r--drivers/net/wwan/rpmsg_wwan_ctrl.c1
-rw-r--r--drivers/net/wwan/t7xx/Makefile2
-rw-r--r--drivers/net/wwan/wwan_core.c3
-rw-r--r--drivers/nvme/host/core.c6
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/host/tcp.c46
-rw-r--r--drivers/of/dynamic.c1
-rw-r--r--drivers/of/platform.c5
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.c10
-rw-r--r--drivers/pci/doe.c30
-rw-r--r--drivers/pci/remove.c5
-rw-r--r--drivers/perf/amlogic/meson_g12_ddr_pmu.c34
-rw-r--r--drivers/pinctrl/mediatek/Kconfig44
-rw-r--r--drivers/pinctrl/pinctrl-at91-pio4.c1
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c2
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c2
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c3
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c2
-rw-r--r--drivers/platform/x86/ideapad-laptop.c23
-rw-r--r--drivers/platform/x86/intel/pmc/core.c13
-rw-r--r--drivers/platform/x86/think-lmi.c20
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c8
-rw-r--r--drivers/pwm/core.c12
-rw-r--r--drivers/pwm/pwm-cros-ec.c1
-rw-r--r--drivers/pwm/pwm-hibvt.c1
-rw-r--r--drivers/pwm/pwm-iqs620a.c1
-rw-r--r--drivers/pwm/pwm-meson.c8
-rw-r--r--drivers/pwm/pwm-sprd.c1
-rw-r--r--drivers/regulator/fan53555.c13
-rw-r--r--drivers/regulator/fixed.c2
-rw-r--r--drivers/regulator/sm5703-regulator.c2
-rw-r--r--drivers/scsi/iscsi_tcp.c3
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c4
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c5
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c1
-rw-r--r--drivers/scsi/scsi.c11
-rw-r--r--drivers/scsi/ses.c20
-rw-r--r--drivers/spi/spi-rockchip-sfc.c2
-rw-r--r--drivers/spi/spi.c5
-rw-r--r--drivers/tee/optee/call.c2
-rw-r--r--drivers/tee/tee_shm.c2
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c1
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c9
-rw-r--r--drivers/thermal/intel/therm_throt.c73
-rw-r--r--drivers/thermal/thermal_sysfs.c6
-rw-r--r--drivers/tty/serial/8250/8250_port.c11
-rw-r--r--drivers/tty/serial/fsl_lpuart.c10
-rw-r--r--drivers/tty/serial/sh-sci.c10
-rw-r--r--drivers/ufs/core/ufshcd.c47
-rw-r--r--drivers/usb/cdns3/cdnsp-ep0.c3
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c4
-rw-r--r--drivers/usb/gadget/function/f_fs.c2
-rw-r--r--drivers/usb/gadget/legacy/inode.c2
-rw-r--r--drivers/usb/host/xhci-pci.c7
-rw-r--r--drivers/usb/host/xhci-tegra.c6
-rw-r--r--drivers/usb/host/xhci.c7
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/option.c10
-rw-r--r--drivers/usb/typec/altmodes/displayport.c6
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c8
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c13
-rw-r--r--drivers/vhost/scsi.c39
-rw-r--r--drivers/video/fbdev/core/fbcon.c18
-rw-r--r--drivers/video/fbdev/core/fbmem.c2
-rw-r--r--fs/9p/xattr.c8
-rw-r--r--fs/btrfs/backref.c85
-rw-r--r--fs/btrfs/disk-io.c14
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c11
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/volumes.c20
-rw-r--r--fs/cifs/cifsfs.h5
-rw-r--r--fs/cifs/cifssmb.c30
-rw-r--r--fs/cifs/fs_context.c13
-rw-r--r--fs/cifs/fs_context.h3
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb2pdu.c42
-rw-r--r--fs/dax.c52
-rw-r--r--fs/fs-writeback.c17
-rw-r--r--fs/ksmbd/connection.c17
-rw-r--r--fs/ksmbd/ksmbd_work.h2
-rw-r--r--fs/ksmbd/server.c5
-rw-r--r--fs/ksmbd/smb2pdu.c59
-rw-r--r--fs/ksmbd/smb2pdu.h1
-rw-r--r--fs/ksmbd/smb_common.c138
-rw-r--r--fs/ksmbd/smb_common.h2
-rw-r--r--fs/ksmbd/unicode.c18
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/netfs/iterator.c2
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfsd/blocklayout.c1
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4xdr.c15
-rw-r--r--fs/nilfs2/btree.c1
-rw-r--r--fs/nilfs2/direct.c1
-rw-r--r--fs/nilfs2/segment.c23
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nilfs2/the_nilfs.c12
-rw-r--r--fs/userfaultfd.c6
-rw-r--r--include/acpi/video.h15
-rw-r--r--include/asm-generic/atomic.h4
-rw-r--r--include/asm-generic/cmpxchg-local.h12
-rw-r--r--include/asm-generic/cmpxchg.h6
-rw-r--r--include/asm-generic/io.h16
-rw-r--r--include/linux/bpf.h54
-rw-r--r--include/linux/bpf_local_storage.h19
-rw-r--r--include/linux/bpf_mem_alloc.h2
-rw-r--r--include/linux/bpf_verifier.h72
-rw-r--r--include/linux/btf.h8
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/ethtool.h1
-rw-r--r--include/linux/ethtool_netlink.h6
-rw-r--r--include/linux/filter.h9
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/hwmon.h2
-rw-r--r--include/linux/if_bridge.h1
-rw-r--r--include/linux/kmsan.h39
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--include/linux/kvm_irqfd.h2
-rw-r--r--include/linux/leds.h18
-rw-r--r--include/linux/mlx5/device.h16
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/mlx5_ifc.h43
-rw-r--r--include/linux/mlx5/qp.h10
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/module.h127
-rw-r--r--include/linux/netdevice.h17
-rw-r--r--include/linux/pci-doe.h8
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/pds/pds_adminq.h647
-rw-r--r--include/linux/pds/pds_auxbus.h20
-rw-r--r--include/linux/pds/pds_common.h68
-rw-r--r--include/linux/pds/pds_core_if.h571
-rw-r--r--include/linux/pds/pds_intr.h163
-rw-r--r--include/linux/phy.h41
-rw-r--r--include/linux/phylink.h1
-rw-r--r--include/linux/rtnetlink.h3
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/sctp.h18
-rw-r--r--include/linux/skbuff.h105
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h6
-rw-r--r--include/linux/stmmac.h19
-rw-r--r--include/net/bluetooth/hci_core.h1
-rw-r--r--include/net/bonding.h8
-rw-r--r--include/net/dropreason-core.h370
-rw-r--r--include/net/dropreason.h374
-rw-r--r--include/net/dsa_stubs.h48
-rw-r--r--include/net/flow_dissector.h38
-rw-r--r--include/net/fou.h2
-rw-r--r--include/net/handshake.h43
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/ip_tunnels.h27
-rw-r--r--include/net/mana/gdma.h4
-rw-r--r--include/net/mana/mana.h27
-rw-r--r--include/net/netdev_queues.h173
-rw-r--r--include/net/netfilter/nf_tables.h4
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/net/page_pool.h8
-rw-r--r--include/net/pkt_sched.h3
-rw-r--r--include/net/raw.h4
-rw-r--r--include/net/sctp/sctp.h12
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/xdp.h76
-rw-r--r--include/net/xsk_buff_pool.h9
-rw-r--r--include/soc/mscc/ocelot.h31
-rw-r--r--include/trace/events/handshake.h159
-rw-r--r--include/trace/stages/stage5_get_offsets.h21
-rw-r--r--include/uapi/linux/bpf.h61
-rw-r--r--include/uapi/linux/handshake.h73
-rw-r--r--include/uapi/linux/if_bridge.h1
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/if_packet.h1
-rw-r--r--include/uapi/linux/pkt_sched.h17
-rw-r--r--include/uapi/linux/virtio_blk.h18
-rw-r--r--include/ufs/ufshcd.h1
-rw-r--r--init/initramfs.c11
-rw-r--r--io_uring/alloc_cache.h1
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--io_uring/kbuf.c7
-rw-r--r--io_uring/poll.c1
-rw-r--r--io_uring/rsrc.h12
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/arraymap.c12
-rw-r--r--kernel/bpf/bloom_filter.c29
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c23
-rw-r--r--kernel/bpf/bpf_inode_storage.c22
-rw-r--r--kernel/bpf/bpf_iter.c70
-rw-r--r--kernel/bpf/bpf_local_storage.c382
-rw-r--r--kernel/bpf/bpf_struct_ops.c260
-rw-r--r--kernel/bpf/bpf_task_storage.c27
-rw-r--r--kernel/bpf/btf.c279
-rw-r--r--kernel/bpf/cpumap.c8
-rw-r--r--kernel/bpf/cpumask.c43
-rw-r--r--kernel/bpf/devmap.c24
-rw-r--r--kernel/bpf/hashtab.c38
-rw-r--r--kernel/bpf/helpers.c139
-rw-r--r--kernel/bpf/local_storage.c6
-rw-r--r--kernel/bpf/log.c330
-rw-r--r--kernel/bpf/lpm_trie.c6
-rw-r--r--kernel/bpf/memalloc.c59
-rw-r--r--kernel/bpf/queue_stack_maps.c22
-rw-r--r--kernel/bpf/reuseport_array.c2
-rw-r--r--kernel/bpf/ringbuf.c6
-rw-r--r--kernel/bpf/stackmap.c6
-rw-r--r--kernel/bpf/syscall.c112
-rw-r--r--kernel/bpf/trampoline.c28
-rw-r--r--kernel/bpf/verifier.c1111
-rw-r--r--kernel/cgroup/cgroup.c14
-rw-r--r--kernel/cgroup/cpuset.c178
-rw-r--r--kernel/cgroup/legacy_freezer.c7
-rw-r--r--kernel/cgroup/rstat.c4
-rw-r--r--kernel/dma/swiotlb.c31
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/module/internal.h1
-rw-r--r--kernel/module/kallsyms.c16
-rw-r--r--kernel/rcu/tree.c27
-rw-r--r--kernel/sched/fair.c10
-rw-r--r--kernel/sys.c69
-rw-r--r--kernel/trace/ftrace.c15
-rw-r--r--kernel/trace/ring_buffer.c13
-rw-r--r--kernel/trace/trace.c28
-rw-r--r--kernel/trace/trace_events_synth.c19
-rw-r--r--kernel/trace/trace_osnoise.c4
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--lib/maple_tree.c353
-rw-r--r--mm/backing-dev.c12
-rw-r--r--mm/huge_memory.c19
-rw-r--r--mm/hugetlb.c14
-rw-r--r--mm/kfence/core.c32
-rw-r--r--mm/khugepaged.c4
-rw-r--r--mm/kmsan/hooks.c55
-rw-r--r--mm/kmsan/shadow.c27
-rw-r--r--mm/maccess.c16
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mempolicy.c104
-rw-r--r--mm/mmap.c51
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/page_alloc.c19
-rw-r--r--mm/swap.c2
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/usercopy.c2
-rw-r--r--mm/vmalloc.c18
-rw-r--r--net/8021q/vlan_dev.c242
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/Kconfig20
-rw-r--r--net/Makefile3
-rw-r--r--net/bluetooth/hci_conn.c89
-rw-r--r--net/bluetooth/hci_event.c18
-rw-r--r--net/bluetooth/hci_sync.c13
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bluetooth/l2cap_core.c24
-rw-r--r--net/bluetooth/sco.c85
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c14
-rw-r--r--net/bpf/test_run.c34
-rw-r--r--net/bridge/br_arp_nd_proxy.c33
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_forward.c8
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c17
-rw-r--r--net/bridge/br_netlink.c8
-rw-r--r--net/bridge/br_private.h5
-rw-r--r--net/bridge/br_switchdev.c11
-rw-r--r--net/bridge/br_vlan.c1
-rw-r--r--net/bridge/br_vlan_options.c20
-rw-r--r--net/can/isotp.c74
-rw-r--r--net/can/j1939/transport.c5
-rw-r--r--net/compat.c13
-rw-r--r--net/core/bpf_sk_storage.c24
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/dev_ioctl.c12
-rw-r--r--net/core/drop_monitor.c33
-rw-r--r--net/core/filter.c29
-rw-r--r--net/core/gro.c2
-rw-r--r--net/core/netpoll.c19
-rw-r--r--net/core/page_pool.c36
-rw-r--r--net/core/rtnetlink.c13
-rw-r--r--net/core/scm.c9
-rw-r--r--net/core/skbuff.c121
-rw-r--r--net/core/sock.c13
-rw-r--r--net/core/sock_map.c8
-rw-r--r--net/core/xdp.c29
-rw-r--r--net/dsa/Makefile12
-rw-r--r--net/dsa/dsa.c19
-rw-r--r--net/dsa/master.c2
-rw-r--r--net/dsa/master.h2
-rw-r--r--net/dsa/slave.c11
-rw-r--r--net/dsa/stubs.c10
-rw-r--r--net/dsa/switch.c85
-rw-r--r--net/dsa/trace.c39
-rw-r--r--net/dsa/trace.h447
-rw-r--r--net/ethtool/linkmodes.c7
-rw-r--r--net/ethtool/mm.c33
-rw-r--r--net/handshake/.kunitconfig11
-rw-r--r--net/handshake/Makefile13
-rw-r--r--net/handshake/genl.c58
-rw-r--r--net/handshake/genl.h24
-rw-r--r--net/handshake/handshake-test.c523
-rw-r--r--net/handshake/handshake.h87
-rw-r--r--net/handshake/netlink.c319
-rw-r--r--net/handshake/request.c344
-rw-r--r--net/handshake/tlshd.c418
-rw-r--r--net/handshake/trace.c20
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/bpf_tcp_ca.c23
-rw-r--r--net/ipv4/fou_bpf.c119
-rw-r--r--net/ipv4/fou_core.c5
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ping.c8
-rw-r--r--net/ipv4/raw.c36
-rw-r--r--net/ipv4/raw_diag.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c66
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/icmp.c15
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/rpl.c3
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/mac80211/drop.h56
-rw-r--r--net/mac80211/ieee80211_i.h8
-rw-r--r--net/mac80211/main.c31
-rw-r--r--net/mac80211/rx.c55
-rw-r--r--net/mac80211/wpa.c24
-rw-r--r--net/mptcp/fastopen.c11
-rw-r--r--net/mptcp/options.c14
-rw-r--r--net/mptcp/pm.c4
-rw-r--r--net/mptcp/pm_netlink.c4
-rw-r--r--net/mptcp/pm_userspace.c4
-rw-r--r--net/mptcp/protocol.c173
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/sockopt.c24
-rw-r--r--net/mptcp/subflow.c100
-rw-r--r--net/netfilter/nf_conntrack_bpf.c5
-rw-r--r--net/netfilter/nf_tables_api.c69
-rw-r--r--net/netfilter/nft_lookup.c36
-rw-r--r--net/netlink/af_netlink.c15
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/packet/af_packet.c95
-rw-r--r--net/packet/diag.c2
-rw-r--r--net/packet/internal.h2
-rw-r--r--net/qrtr/af_qrtr.c10
-rw-r--r--net/qrtr/ns.c15
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/sch_mqprio.c188
-rw-r--r--net/sched/sch_mqprio_lib.c14
-rw-r--r--net/sched/sch_mqprio_lib.h2
-rw-r--r--net/sched/sch_qfq.c13
-rw-r--r--net/sched/sch_taprio.c77
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/auth.c2
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/sctp/sm_make_chunk.c32
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/sm_statefuns.c14
-rw-r--r--net/sctp/socket.c9
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/sctp/stream_interleave.c5
-rw-r--r--net/smc/af_smc.c11
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c6
-rw-r--r--net/sunrpc/svcauth_unix.c17
-rw-r--r--net/sunrpc/xprtsock.c1
-rw-r--r--net/vmw_vsock/virtio_transport_common.c10
-rw-r--r--net/vmw_vsock/vmci_transport.c8
-rw-r--r--net/vmw_vsock/vsock_loopback.c3
-rw-r--r--net/xdp/xsk.c9
-rw-r--r--net/xdp/xsk_queue.h19
-rw-r--r--net/xdp/xskmap.c8
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c66
-rw-r--r--net/xfrm/xfrm_output.c33
-rw-r--r--rust/Makefile16
-rw-r--r--rust/kernel/print.rs6
-rw-r--r--rust/kernel/str.rs2
-rw-r--r--samples/bpf/cpustat_kern.c4
-rw-r--r--samples/bpf/hbm.c5
-rw-r--r--samples/bpf/ibumad_kern.c4
-rwxr-xr-xsamples/bpf/lwt_len_hist.sh2
-rw-r--r--samples/bpf/offwaketime_kern.c2
-rw-r--r--samples/bpf/task_fd_query_user.c4
-rwxr-xr-xsamples/bpf/test_lwt_bpf.sh2
-rw-r--r--samples/bpf/test_overhead_tp.bpf.c4
-rw-r--r--scripts/Makefile.package64
-rwxr-xr-xscripts/bpf_doc.py2
-rwxr-xr-xscripts/generate_rust_analyzer.py5
-rwxr-xr-xscripts/is_rust_module.sh2
-rwxr-xr-xscripts/kconfig/merge_config.sh2
-rw-r--r--scripts/mod/modpost.c2
-rwxr-xr-xscripts/package/builddeb3
-rwxr-xr-xscripts/package/gen-diff-patch62
-rwxr-xr-xscripts/package/mkdebian103
-rwxr-xr-xscripts/package/mkspec11
-rw-r--r--sound/firewire/tascam/tascam-stream.c2
-rw-r--r--sound/i2c/cs8427.c7
-rw-r--r--sound/pci/emu10k1/emupcm.c14
-rw-r--r--sound/pci/hda/patch_hdmi.c13
-rw-r--r--sound/pci/hda/patch_realtek.c31
-rw-r--r--sound/pci/hda/patch_sigmatel.c10
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c7
-rw-r--r--sound/soc/codecs/da7213.c6
-rw-r--r--sound/soc/codecs/hdac_hdmi.c17
-rw-r--r--sound/soc/codecs/lpass-rx-macro.c4
-rw-r--r--sound/soc/codecs/lpass-tx-macro.c4
-rw-r--r--sound/soc/codecs/lpass-wsa-macro.c4
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c12
-rw-r--r--sound/soc/intel/boards/sof_sdw.c11
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-adl-match.c20
-rw-r--r--sound/soc/soc-pcm.c4
-rw-r--r--sound/soc/sof/ipc4-topology.c8
-rw-r--r--sound/soc/sof/ipc4.c8
-rw-r--r--tools/Makefile14
-rw-r--r--tools/arch/loongarch/include/uapi/asm/bitsperlong.h2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst18
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool42
-rw-r--r--tools/bpf/bpftool/btf_dumper.c83
-rw-r--r--tools/bpf/bpftool/cfg.c29
-rw-r--r--tools/bpf/bpftool/cfg.h5
-rw-r--r--tools/bpf/bpftool/json_writer.c2
-rw-r--r--tools/bpf/bpftool/json_writer.h1
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/prog.c81
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c54
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h3
-rw-r--r--tools/include/uapi/linux/bpf.h61
-rw-r--r--tools/lib/bpf/bpf.c25
-rw-r--r--tools/lib/bpf/bpf.h25
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h4
-rw-r--r--tools/lib/bpf/bpf_helpers.h5
-rw-r--r--tools/lib/bpf/gen_loader.c48
-rw-r--r--tools/lib/bpf/libbpf.c245
-rw-r--r--tools/lib/bpf/libbpf.h3
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/linker.c14
-rw-r--r--tools/lib/bpf/zip.c6
-rw-r--r--tools/mm/page_owner_sort.c2
-rwxr-xr-xtools/net/ynl/ethtool.py (renamed from tools/net/ynl/ethtool)4
-rw-r--r--tools/net/ynl/lib/ynl.py5
-rw-r--r--tools/testing/radix-tree/maple.c16
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch641
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x2
-rw-r--r--tools/testing/selftests/bpf/Makefile22
-rw-r--r--tools/testing/selftests/bpf/autoconf_helper.h9
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c264
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c60
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c9
l---------tools/testing/selftests/bpf/json_writer.c1
l---------tools/testing/selftests/bpf/json_writer.h1
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c160
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ops.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c153
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_attach_query.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_vprintk.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_log.c450
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c82
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h148
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c1
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h8
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c104
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c15
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c1
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c1
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h7
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c96
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c30
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c5
-rw-r--r--tools/testing/selftests/bpf/progs/err.h18
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_attach_probe.c2
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c719
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c163
-rw-r--r--tools/testing/selftests/bpf/progs/iters_num.c242
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c426
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c79
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c4
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c1
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c108
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c76
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c3
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c4
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c23
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c1
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c1
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c1
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h3
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h16
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_iter.c7
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_nounroll.c3
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c11
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c3
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c13
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c1
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg6_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h1
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c11
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h6
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c178
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c78
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_update.c80
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_hash_large_key.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_legacy_printk.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_ops.c138
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h14
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c123
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt_multispec.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c13
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c1
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c2
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_success.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c107
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c529
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_basic_stack.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c171
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c639
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c554
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cfg.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c89
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c227
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c308
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_const_or.c82
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c228
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c56
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div0.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c144
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c825
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c550
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c279
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_value_access.c1245
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_int_ptr.c157
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ld_ind.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_leak_ptr.c92
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c159
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ret_val.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_masking.c410
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_meta_access.c284
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_stack.c371
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ringbuf.c131
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c374
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c484
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_uninit.c61
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value.c158
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c78
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_or_null.c288
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c349
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xadd.c124
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp.c24
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c1722
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_features.c1
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c42
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdpwall.c1
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c25
-rwxr-xr-xtools/testing/selftests/bpf/test_ftrace.sh7
-rw-r--r--tools/testing/selftests/bpf/test_loader.c536
-rw-r--r--tools/testing/selftests/bpf/test_progs.c108
-rw-r--r--tools/testing/selftests/bpf/test_progs.h2
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh13
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c27
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c175
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh1
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c22
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c90
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h5
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c26
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.h7
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c68
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c379
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_stack.c64
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c129
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_deduction.c136
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c411
-rw-r--r--tools/testing/selftests/bpf/verifier/btf_ctx_access.c13
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/cfg.c73
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_skb.c197
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_storage.c220
-rw-r--r--tools/testing/selftests/bpf/verifier/const_or.c60
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_msg.c181
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c40
-rw-r--r--tools/testing/selftests/bpf/verifier/div0.c184
-rw-r--r--tools/testing/selftests/bpf/verifier/div_overflow.c110
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c650
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_packet_access.c460
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_restricted.c196
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c953
-rw-r--r--tools/testing/selftests/bpf/verifier/int_ptr.c161
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_ind.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/leak_ptr.c67
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c99
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ret_val.c65
-rw-r--r--tools/testing/selftests/bpf/verifier/masking.c322
-rw-r--r--tools/testing/selftests/bpf/verifier/meta_access.c235
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_stack.c305
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_tp_writable.c35
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c345
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c359
-rw-r--r--tools/testing/selftests/bpf/verifier/uninit.c39
-rw-r--r--tools/testing/selftests/bpf/verifier/value.c104
-rw-r--r--tools/testing/selftests/bpf/verifier/value_adj_spill.c43
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c220
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c291
-rw-r--r--tools/testing/selftests/bpf/verifier/xadd.c97
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp.c14
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c1468
-rw-r--r--tools/testing/selftests/bpf/veristat.c207
-rw-r--r--tools/testing/selftests/bpf/xdp_features.c67
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c10
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h4
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_metadata.h5
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c96
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h4
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile3
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh264
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh143
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/option_prio.sh245
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh28
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh6
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh288
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3.sh15
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh60
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_root.sh4
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c8
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh10
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh2
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh89
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py1278
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh1
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c2
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh862
-rw-r--r--tools/virtio/virtio-trace/README2
-rw-r--r--usr/gen_init_cpio.c12
-rw-r--r--virt/kvm/eventfd.c49
-rw-r--r--virt/kvm/kvm_main.c1
1255 files changed, 47805 insertions, 17945 deletions
diff --git a/.gitignore b/.gitignore
index 70ec6037fa7a..51117ba29c88 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,6 +105,7 @@ modules.order
!.gitignore
!.mailmap
!.rustfmt.toml
+!.kunitconfig
#
# Generated include files
diff --git a/.mailmap b/.mailmap
index e2af78f67f7c..6686879ce0d5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -232,6 +232,8 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
+<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
+<jon.toppins+linux@gmail.com> <jtoppins@redhat.com>
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
<josh@joshtriplett.org> <josh@freedesktop.org>
<josh@joshtriplett.org> <josh@kernel.org>
@@ -265,7 +267,9 @@ Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
+Leonard Crestez <leonard.crestez@nxp.com> Leonard Crestez <cdleonard@gmail.com>
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
+Leonard Göhrs <l.goehrs@pengutronix.de>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
@@ -295,6 +299,8 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <m.othacehe@gmail.com>
+Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
+Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 0ad7e7ec0d27..09a563bbe3e7 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -36,7 +36,6 @@ problems and bugs in particular.
reporting-issues
reporting-regressions
- security-bugs
bug-hunting
bug-bisect
tainted-kernels
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 19600c50277b..6ae5f129fbca 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -128,6 +128,7 @@ parameter is applicable::
KVM Kernel Virtual Machine support is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
+ LOONGARCH LoongArch architecture is enabled.
LOOP Loopback device support is enabled.
M68k M68k architecture is enabled.
These options have more detailed description inside of
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d057dd..7016cb12dc4e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6933,6 +6933,12 @@
When enabled, memory and cache locality will be
impacted.
+ writecombine= [LOONGARCH] Control the MAT (Memory Access Type) of
+ ioremap_wc().
+
+ on - Enable writecombine, use WUC for ioremap_wc()
+ off - Disable writecombine, use SUC for ioremap_wc()
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index ec62151fe672..2fd5a030235a 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -395,7 +395,7 @@ might want to be aware of; it for example explains how to add your issue to the
list of tracked regressions, to ensure it won't fall through the cracks.
What qualifies as security issue is left to your judgment. Consider reading
-Documentation/admin-guide/security-bugs.rst before proceeding, as it
+Documentation/process/security-bugs.rst before proceeding, as it
provides additional details how to best handle security issues.
An issue is a 'really severe problem' when something totally unacceptably bad
@@ -1269,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
the report's text to these addresses; but on top of it put a small note where
you mention that you filed it with a link to the ticket.
-See Documentation/admin-guide/security-bugs.rst for more information.
+See Documentation/process/security-bugs.rst for more information.
Duties after the report went out
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 7403f81c995c..609b71f5747d 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,8 @@ into the bpf-next tree will make their way into net-next tree. net and
net-next are both run by David S. Miller. From there, they will go
into the kernel mainline tree run by Linus Torvalds. To read up on the
process of net and net-next being merged into the mainline tree, see
-the `netdev-FAQ`_.
+the documentation on netdev subsystem at
+Documentation/process/maintainer-netdev.rst.
@@ -147,7 +148,8 @@ request)::
Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
---------------------------------------------------------------------------------
-A: The process is the very same as described in the `netdev-FAQ`_,
+A: The process is the very same as described in the netdev subsystem
+documentation at Documentation/process/maintainer-netdev.rst,
so please read up on it. The subject line must indicate whether the
patch is a fix or rather "next-like" content in order to let the
maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,8 +208,9 @@ ii) run extensive BPF test suite and
Once the BPF pull request was accepted by David S. Miller, then
the patches end up in net or net-next tree, respectively, and
make their way from there further into mainline. Again, see the
-`netdev-FAQ`_ for additional information e.g. on how often they are
-merged to mainline.
+documentation for netdev subsystem at
+Documentation/process/maintainer-netdev.rst for additional information
+e.g. on how often they are merged to mainline.
Q: How long do I need to wait for feedback on my BPF patches?
-------------------------------------------------------------
@@ -230,7 +233,8 @@ Q: Are patches applied to bpf-next when the merge window is open?
-----------------------------------------------------------------
A: For the time when the merge window is open, bpf-next will not be
processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the `netdev-FAQ`_ about further details.
+so feel free to read up on the netdev docs at
+Documentation/process/maintainer-netdev.rst about further details.
During those two weeks of merge window, we might ask you to resend
your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +398,8 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
netdev@vger.kernel.org
The process in general is the same as on netdev itself, see also the
-`netdev-FAQ`_.
+the documentation on networking subsystem at
+Documentation/process/maintainer-netdev.rst.
Q: Do you also backport to kernels not currently maintained as stable?
----------------------------------------------------------------------
@@ -410,7 +415,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
What should I do?
A: The same rules apply as with netdev patch submissions in general, see
-the `netdev-FAQ`_.
+the netdev docs at Documentation/process/maintainer-netdev.rst.
Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
ask the BPF maintainers to queue the patches instead. This can be done
@@ -684,7 +689,6 @@ when:
.. Links
-.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html
.. _selftests:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst
index 528feddf2db9..2c872a1ee08e 100644
--- a/Documentation/bpf/clang-notes.rst
+++ b/Documentation/bpf/clang-notes.rst
@@ -20,6 +20,12 @@ Arithmetic instructions
For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with
``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included.
+Jump instructions
+=================
+
+If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d)
+instruction, which is not supported by the Linux kernel verifier.
+
Atomic operations
=================
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 75344cd230e5..41efd8874eeb 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -117,12 +117,7 @@ For example:
As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
a map, the reference can be removed from the map with bpf_kptr_xchg(), or
-opportunistically acquired with bpf_cpumask_kptr_get():
-
-.. kernel-doc:: kernel/bpf/cpumask.c
- :identifiers: bpf_cpumask_kptr_get
-
-Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
+opportunistically acquired using RCU:
.. code-block:: c
@@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
/**
* A simple example tracepoint program showing how a
* struct bpf_cpumask * kptr that is stored in a map can
- * be acquired using the bpf_cpumask_kptr_get() kfunc.
+ * be passed to kfuncs using RCU protection.
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
@@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
if (!v)
return -ENOENT;
+ bpf_rcu_read_lock();
/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
- kptr = bpf_cpumask_kptr_get(&v->cpumask);
- if (!kptr)
+ kptr = v->cpumask;
+ if (!kptr) {
/* If no bpf_cpumask was present in the map, it's because
* we're racing with another CPU that removed it with
* bpf_kptr_xchg() between the bpf_map_lookup_elem()
- * above, and our call to bpf_cpumask_kptr_get().
- * bpf_cpumask_kptr_get() internally safely handles this
- * race, and will return NULL if the cpumask is no longer
- * present in the map by the time we invoke the kfunc.
+ * above, and our load of the pointer from the map.
*/
+ bpf_rcu_read_unlock();
return -EBUSY;
+ }
- /* Free the reference we just took above. Note that the
- * original struct bpf_cpumask * kptr is still in the map. It will
- * be freed either at a later time if another context deletes
- * it from the map, or automatically by the BPF subsystem if
- * it's still present when the map is destroyed.
- */
- bpf_cpumask_release(kptr);
+ bpf_cpumask_setall(kptr);
+ bpf_rcu_read_unlock();
return 0;
}
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index db8789e6969e..492980ece1ab 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -11,7 +11,8 @@ Documentation conventions
=========================
For brevity, this document uses the type notion "u64", "u32", etc.
-to mean an unsigned integer whose width is the specified number of bits.
+to mean an unsigned integer whose width is the specified number of bits,
+and "s32", etc. to mean a signed integer of the specified number of bits.
Registers and calling convention
================================
@@ -242,28 +243,58 @@ Jump instructions
otherwise identical operations.
The 'code' field encodes the operation as below:
-======== ===== ========================= ============
-code value description notes
-======== ===== ========================= ============
-BPF_JA 0x00 PC += off BPF_JMP only
-BPF_JEQ 0x10 PC += off if dst == src
-BPF_JGT 0x20 PC += off if dst > src unsigned
-BPF_JGE 0x30 PC += off if dst >= src unsigned
-BPF_JSET 0x40 PC += off if dst & src
-BPF_JNE 0x50 PC += off if dst != src
-BPF_JSGT 0x60 PC += off if dst > src signed
-BPF_JSGE 0x70 PC += off if dst >= src signed
-BPF_CALL 0x80 function call
-BPF_EXIT 0x90 function / program return BPF_JMP only
-BPF_JLT 0xa0 PC += off if dst < src unsigned
-BPF_JLE 0xb0 PC += off if dst <= src unsigned
-BPF_JSLT 0xc0 PC += off if dst < src signed
-BPF_JSLE 0xd0 PC += off if dst <= src signed
-======== ===== ========================= ============
+======== ===== === =========================================== =========================================
+code value src description notes
+======== ===== === =========================================== =========================================
+BPF_JA 0x0 0x0 PC += offset BPF_JMP only
+BPF_JEQ 0x1 any PC += offset if dst == src
+BPF_JGT 0x2 any PC += offset if dst > src unsigned
+BPF_JGE 0x3 any PC += offset if dst >= src unsigned
+BPF_JSET 0x4 any PC += offset if dst & src
+BPF_JNE 0x5 any PC += offset if dst != src
+BPF_JSGT 0x6 any PC += offset if dst > src signed
+BPF_JSGE 0x7 any PC += offset if dst >= src signed
+BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
+BPF_CALL 0x8 0x1 call PC += offset see `Program-local functions`_
+BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
+BPF_EXIT 0x9 0x0 return BPF_JMP only
+BPF_JLT 0xa any PC += offset if dst < src unsigned
+BPF_JLE 0xb any PC += offset if dst <= src unsigned
+BPF_JSLT 0xc any PC += offset if dst < src signed
+BPF_JSLE 0xd any PC += offset if dst <= src signed
+======== ===== === =========================================== =========================================
The eBPF program needs to store the return value into register R0 before doing a
-BPF_EXIT.
+``BPF_EXIT``.
+Example:
+
+``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means::
+
+ if (s32)dst s>= (s32)src goto +offset
+
+where 's>=' indicates a signed '>=' comparison.
+
+Helper functions
+~~~~~~~~~~~~~~~~
+
+Helper functions are a concept whereby BPF programs can call into a
+set of function calls exposed by the underlying platform.
+
+Historically, each helper function was identified by an address
+encoded in the imm field. The available helper functions may differ
+for each program type, but address values are unique across all program types.
+
+Platforms that support the BPF Type Format (BTF) support identifying
+a helper function by a BTF ID encoded in the imm field, where the BTF ID
+identifies the helper name and type.
+
+Program-local functions
+~~~~~~~~~~~~~~~~~~~~~~~
+Program-local functions are functions exposed by the same BPF program as the
+caller, and are referenced by offset from the call instruction, similar to
+``BPF_JA``. A ``BPF_EXIT`` within the program-local function will return to
+the caller.
Load and store instructions
===========================
@@ -385,14 +416,56 @@ and loaded back to ``R0``.
-----------------------------
Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction
-encoding for an extra imm64 value.
-
-There is currently only one such instruction.
-
-``BPF_LD | BPF_DW | BPF_IMM`` means::
-
- dst = imm64
-
+encoding defined in `Instruction encoding`_, and use the 'src' field of the
+basic instruction to hold an opcode subtype.
+
+The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions
+with opcode subtypes in the 'src' field, using new terms such as "map"
+defined further below:
+
+========================= ====== === ========================================= =========== ==============
+opcode construction opcode src pseudocode imm type dst type
+========================= ====== === ========================================= =========== ==============
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer
+========================= ====== === ========================================= =========== ==============
+
+where
+
+* map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map (see `Maps`_)
+* map_by_idx(imm) means to convert a 32-bit index into an address of a map
+* map_val(map) gets the address of the first value in a given map
+* var_addr(imm) gets the address of a platform variable (see `Platform Variables`_) with a given id
+* code_addr(imm) gets the address of the instruction at a specified relative offset in number of (64-bit) instructions
+* the 'imm type' can be used by disassemblers for display
+* the 'dst type' can be used for verification and JIT compilation purposes
+
+Maps
+~~~~
+
+Maps are shared memory regions accessible by eBPF programs on some platforms.
+A map can have various semantics as defined in a separate document, and may or
+may not have a single contiguous memory region, but the 'map_val(map)' is
+currently only defined for maps that do have a single contiguous memory region.
+
+Each map can have a file descriptor (fd) if supported by the platform, where
+'map_by_fd(imm)' means to get the map with the specified file descriptor. Each
+BPF program can also be defined to use a set of maps associated with the
+program at load time, and 'map_by_idx(imm)' means to get the map with the given
+index in the set associated with the BPF program containing the instruction.
+
+Platform Variables
+~~~~~~~~~~~~~~~~~~
+
+Platform variables are memory regions, identified by integer ids, exposed by
+the runtime and accessible by BPF programs on some platforms. The
+'var_addr(imm)' operation means to get the address of the memory region
+identified by the given id.
Legacy BPF Packet access instructions
-------------------------------------
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 69eccf6f98ef..3b42cfe12437 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -179,9 +179,10 @@ both are orthogonal to each other.
---------------------
The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
-passed in to it. There can be only one referenced pointer that can be passed in.
-All copies of the pointer being released are invalidated as a result of invoking
-kfunc with this flag.
+passed in to it. There can be only one referenced pointer that can be passed
+in. All copies of the pointer being released are invalidated as a result of
+invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
+protection afforded by the KF_TRUSTED_ARGS flag described below.
2.4.4 KF_KPTR_GET flag
----------------------
@@ -470,13 +471,50 @@ struct_ops callback arg. For example:
struct task_struct *acquired;
acquired = bpf_task_acquire(task);
+ if (acquired)
+ /*
+ * In a typical program you'd do something like store
+ * the task in a map, and the map will automatically
+ * release it later. Here, we release it manually.
+ */
+ bpf_task_release(acquired);
+ return 0;
+ }
+
+
+References acquired on ``struct task_struct *`` objects are RCU protected.
+Therefore, when in an RCU read region, you can obtain a pointer to a task
+embedded in a map value without having to acquire a reference:
+
+.. code-block:: c
+
+ #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+ private(TASK) static struct task_struct *global;
+
+ /**
+ * A trivial example showing how to access a task stored
+ * in a map using RCU.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *local_copy;
+
+ bpf_rcu_read_lock();
+ local_copy = global;
+ if (local_copy)
+ /*
+ * We could also pass local_copy to kfuncs or helper functions here,
+ * as we're guaranteed that local_copy will be valid until we exit
+ * the RCU read region below.
+ */
+ bpf_printk("Global task %s is valid", local_copy->comm);
+ else
+ bpf_printk("No global task found");
+ bpf_rcu_read_unlock();
+
+ /* At this point we can no longer reference local_copy. */
- /*
- * In a typical program you'd do something like store
- * the task in a map, and the map will automatically
- * release it later. Here, we release it manually.
- */
- bpf_task_release(acquired);
return 0;
}
@@ -534,74 +572,6 @@ bpf_task_release() respectively, so we won't provide examples for them.
----
-You may also acquire a reference to a ``struct cgroup`` kptr that's already
-stored in a map using bpf_cgroup_kptr_get():
-
-.. kernel-doc:: kernel/bpf/helpers.c
- :identifiers: bpf_cgroup_kptr_get
-
-Here's an example of how it can be used:
-
-.. code-block:: c
-
- /* struct containing the struct task_struct kptr which is actually stored in the map. */
- struct __cgroups_kfunc_map_value {
- struct cgroup __kptr * cgroup;
- };
-
- /* The map containing struct __cgroups_kfunc_map_value entries. */
- struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, int);
- __type(value, struct __cgroups_kfunc_map_value);
- __uint(max_entries, 1);
- } __cgroups_kfunc_map SEC(".maps");
-
- /* ... */
-
- /**
- * A simple example tracepoint program showing how a
- * struct cgroup kptr that is stored in a map can
- * be acquired using the bpf_cgroup_kptr_get() kfunc.
- */
- SEC("tp_btf/cgroup_mkdir")
- int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
- {
- struct cgroup *kptr;
- struct __cgroups_kfunc_map_value *v;
- s32 id = cgrp->self.id;
-
- /* Assume a cgroup kptr was previously stored in the map. */
- v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
- if (!v)
- return -ENOENT;
-
- /* Acquire a reference to the cgroup kptr that's already stored in the map. */
- kptr = bpf_cgroup_kptr_get(&v->cgroup);
- if (!kptr)
- /* If no cgroup was present in the map, it's because
- * we're racing with another CPU that removed it with
- * bpf_kptr_xchg() between the bpf_map_lookup_elem()
- * above, and our call to bpf_cgroup_kptr_get().
- * bpf_cgroup_kptr_get() internally safely handles this
- * race, and will return NULL if the task is no longer
- * present in the map by the time we invoke the kfunc.
- */
- return -EBUSY;
-
- /* Free the reference we just took above. Note that the
- * original struct cgroup kptr is still in the map. It will
- * be freed either at a later time if another context deletes
- * it from the map, or automatically by the BPF subsystem if
- * it's still present when the map is destroyed.
- */
- bpf_cgroup_release(kptr);
-
- return 0;
- }
-
-----
-
Other kfuncs available for interacting with ``struct cgroup *`` objects are
bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
index f9b3b252e28f..7545a2049692 100644
--- a/Documentation/bpf/libbpf/index.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -2,23 +2,32 @@
.. _libbpf:
+======
libbpf
======
+If you are looking to develop BPF applications using the libbpf library, this
+directory contains important documentation that you should read.
+
+To get started, it is recommended to begin with the :doc:`libbpf Overview
+<libbpf_overview>` document, which provides a high-level understanding of the
+libbpf APIs and their usage. This will give you a solid foundation to start
+exploring and utilizing the various features of libbpf to develop your BPF
+applications.
+
.. toctree::
:maxdepth: 1
+ libbpf_overview
API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
program_types
libbpf_naming_convention
libbpf_build
-This is documentation for libbpf, a userspace library for loading and
-interacting with bpf programs.
-All general BPF questions, including kernel functionality, libbpf APIs and
-their application, should be sent to bpf@vger.kernel.org mailing list.
-You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
-mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
-Please search the archive before asking new questions. It very well might
-be that this was already addressed or answered before.
+All general BPF questions, including kernel functionality, libbpf APIs and their
+application, should be sent to bpf@vger.kernel.org mailing list. You can
+`subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the mailing list
+search its `archive <https://lore.kernel.org/bpf/>`_. Please search the archive
+before asking new questions. It may be that this was already addressed or
+answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst
new file mode 100644
index 000000000000..f36a2d4ffea2
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_overview.rst
@@ -0,0 +1,228 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+libbpf Overview
+===============
+
+libbpf is a C-based library containing a BPF loader that takes compiled BPF
+object files and prepares and loads them into the Linux kernel. libbpf takes the
+heavy lifting of loading, verifying, and attaching BPF programs to various
+kernel hooks, allowing BPF application developers to focus only on BPF program
+correctness and performance.
+
+The following are the high-level features supported by libbpf:
+
+* Provides high-level and low-level APIs for user space programs to interact
+ with BPF programs. The low-level APIs wrap all the bpf system call
+ functionality, which is useful when users need more fine-grained control
+ over the interactions between user space and BPF programs.
+* Provides overall support for the BPF object skeleton generated by bpftool.
+ The skeleton file simplifies the process for the user space programs to access
+ global variables and work with BPF programs.
+* Provides BPF-side APIS, including BPF helper definitions, BPF maps support,
+ and tracing helpers, allowing developers to simplify BPF code writing.
+* Supports BPF CO-RE mechanism, enabling BPF developers to write portable
+ BPF programs that can be compiled once and run across different kernel
+ versions.
+
+This document will delve into the above concepts in detail, providing a deeper
+understanding of the capabilities and advantages of libbpf and how it can help
+you develop BPF applications efficiently.
+
+BPF App Lifecycle and libbpf APIs
+==================================
+
+A BPF application consists of one or more BPF programs (either cooperating or
+completely independent), BPF maps, and global variables. The global
+variables are shared between all BPF programs, which allows them to cooperate on
+a common set of data. libbpf provides APIs that user space programs can use to
+manipulate the BPF programs by triggering different phases of a BPF application
+lifecycle.
+
+The following section provides a brief overview of each phase in the BPF life
+cycle:
+
+* **Open phase**: In this phase, libbpf parses the BPF
+ object file and discovers BPF maps, BPF programs, and global variables. After
+ a BPF app is opened, user space apps can make additional adjustments
+ (setting BPF program types, if necessary; pre-setting initial values for
+ global variables, etc.) before all the entities are created and loaded.
+
+* **Load phase**: In the load phase, libbpf creates BPF
+ maps, resolves various relocations, and verifies and loads BPF programs into
+ the kernel. At this point, libbpf validates all the parts of a BPF application
+ and loads the BPF program into the kernel, but no BPF program has yet been
+ executed. After the load phase, it’s possible to set up the initial BPF map
+ state without racing with the BPF program code execution.
+
+* **Attachment phase**: In this phase, libbpf
+ attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes,
+ cgroup hooks, network packet processing pipeline, etc.). During this
+ phase, BPF programs perform useful work such as processing
+ packets, or updating BPF maps and global variables that can be read from user
+ space.
+
+* **Tear down phase**: In the tear down phase,
+ libbpf detaches BPF programs and unloads them from the kernel. BPF maps are
+ destroyed, and all the resources used by the BPF app are freed.
+
+BPF Object Skeleton File
+========================
+
+BPF skeleton is an alternative interface to libbpf APIs for working with BPF
+objects. Skeleton code abstract away generic libbpf APIs to significantly
+simplify code for manipulating BPF programs from user space. Skeleton code
+includes a bytecode representation of the BPF object file, simplifying the
+process of distributing your BPF code. With BPF bytecode embedded, there are no
+extra files to deploy along with your application binary.
+
+You can generate the skeleton header file ``(.skel.h)`` for a specific object
+file by passing the BPF object to the bpftool. The generated BPF skeleton
+provides the following custom functions that correspond to the BPF lifecycle,
+each of them prefixed with the specific object name:
+
+* ``<name>__open()`` – creates and opens BPF application (``<name>`` stands for
+ the specific bpf object name)
+* ``<name>__load()`` – instantiates, loads,and verifies BPF application parts
+* ``<name>__attach()`` – attaches all auto-attachable BPF programs (it’s
+ optional, you can have more control by using libbpf APIs directly)
+* ``<name>__destroy()`` – detaches all BPF programs and
+ frees up all used resources
+
+Using the skeleton code is the recommended way to work with bpf programs. Keep
+in mind, BPF skeleton provides access to the underlying BPF object, so whatever
+was possible to do with generic libbpf APIs is still possible even when the BPF
+skeleton is used. It's an additive convenience feature, with no syscalls, and no
+cumbersome code.
+
+Other Advantages of Using Skeleton File
+---------------------------------------
+
+* BPF skeleton provides an interface for user space programs to work with BPF
+ global variables. The skeleton code memory maps global variables as a struct
+ into user space. The struct interface allows user space programs to initialize
+ BPF programs before the BPF load phase and fetch and update data from user
+ space afterward.
+
+* The ``skel.h`` file reflects the object file structure by listing out the
+ available maps, programs, etc. BPF skeleton provides direct access to all the
+ BPF maps and BPF programs as struct fields. This eliminates the need for
+ string-based lookups with ``bpf_object_find_map_by_name()`` and
+ ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source
+ code and user-space code getting out of sync.
+
+* The embedded bytecode representation of the object file ensures that the
+ skeleton and the BPF object file are always in sync.
+
+BPF Helpers
+===========
+
+libbpf provides BPF-side APIs that BPF programs can use to interact with the
+system. The BPF helpers definition allows developers to use them in BPF code as
+any other plain C function. For example, there are helper functions to print
+debugging messages, get the time since the system was booted, interact with BPF
+maps, manipulate network packets, etc.
+
+For a complete description of what the helpers do, the arguments they take, and
+the return value, see the `bpf-helpers
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man page.
+
+BPF CO-RE (Compile Once – Run Everywhere)
+=========================================
+
+BPF programs work in the kernel space and have access to kernel memory and data
+structures. One limitation that BPF applications come across is the lack of
+portability across different kernel versions and configurations. `BCC
+<https://github.com/iovisor/bcc/>`_ is one of the solutions for BPF
+portability. However, it comes with runtime overhead and a large binary size
+from embedding the compiler with the application.
+
+libbpf steps up the BPF program portability by supporting the BPF CO-RE concept.
+BPF CO-RE brings together BTF type information, libbpf, and the compiler to
+produce a single executable binary that you can run on multiple kernel versions
+and configurations.
+
+To make BPF programs portable libbpf relies on the BTF type information of the
+running kernel. Kernel also exposes this self-describing authoritative BTF
+information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``.
+
+You can generate the BTF information for the running kernel with the following
+command:
+
+::
+
+ $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+
+The command generates a ``vmlinux.h`` header file with all kernel types
+(:doc:`BTF types <../btf>`) that the running kernel uses. Including
+``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel
+headers.
+
+libbpf enables portability of BPF programs by looking at the BPF program’s
+recorded BTF type and relocation information and matching them to BTF
+information (vmlinux) provided by the running kernel. libbpf then resolves and
+matches all the types and fields, and updates necessary offsets and other
+relocatable data to ensure that BPF program’s logic functions correctly for a
+specific kernel on the host. BPF CO-RE concept thus eliminates overhead
+associated with BPF development and allows developers to write portable BPF
+applications without modifications and runtime source code compilation on the
+target machine.
+
+The following code snippet shows how to read the parent field of a kernel
+``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a
+CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read
+``sz`` bytes from the field referenced by ``src`` into the memory pointed to by
+``dst``.
+
+.. code-block:: C
+ :emphasize-lines: 6
+
+ //...
+ struct task_struct *task = (void *)bpf_get_current_task();
+ struct task_struct *parent_task;
+ int err;
+
+ err = bpf_core_read(&parent_task, sizeof(void *), &task->parent);
+ if (err) {
+ /* handle error */
+ }
+
+ /* parent_task contains the value of task->parent pointer */
+
+In the code snippet, we first get a pointer to the current ``task_struct`` using
+``bpf_get_current_task()``. We then use ``bpf_core_read()`` to read the parent
+field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is
+just like ``bpf_probe_read_kernel()`` BPF helper, except it records information
+about the field that should be relocated on the target kernel. i.e, if the
+``parent`` field gets shifted to a different offset within
+``struct task_struct`` due to some new field added in front of it, libbpf will
+automatically adjust the actual offset to the proper value.
+
+Getting Started with libbpf
+===========================
+
+Check out the `libbpf-bootstrap <https://github.com/libbpf/libbpf-bootstrap>`_
+repository with simple examples of using libbpf to build various BPF
+applications.
+
+See also `libbpf API documentation
+<https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+libbpf and Rust
+===============
+
+If you are building BPF applications in Rust, it is recommended to use the
+`Libbpf-rs <https://github.com/libbpf/libbpf-rs>`_ library instead of bindgen
+bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in
+Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code
+compilation and skeleton generation. Using Libbpf-rs will make building user
+space part of the BPF application easier. Note that the BPF program themselves
+must still be written in plain C.
+
+Additional Documentation
+========================
+
+* `Program types and ELF Sections <https://libbpf.readthedocs.io/en/latest/program_types.html>`_
+* `API naming convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html>`_
+* `Building libbpf <https://libbpf.readthedocs.io/en/latest/libbpf_build.html>`_
+* `API documentation Convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html#api-documentation-convention>`_
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst
index 956b0c86699d..508d009d3bed 100644
--- a/Documentation/bpf/linux-notes.rst
+++ b/Documentation/bpf/linux-notes.rst
@@ -12,6 +12,36 @@ Byte swap instructions
``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively.
+Jump instructions
+=================
+
+``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function
+integer would be read from a specified register, is not currently supported
+by the verifier. Any programs with this instruction will fail to load
+until such support is added.
+
+Maps
+====
+
+Linux only supports the 'map_val(map)' operation on array maps with a single element.
+
+Linux uses an fd_array to store maps associated with a BPF program. Thus,
+map_by_idx(imm) uses the fd at that index in the array.
+
+Variables
+=========
+
+The following 64-bit immediate instruction specifies that a variable address,
+which corresponds to some integer stored in the 'imm' field, should be loaded:
+
+========================= ====== === ========================================= =========== ==============
+opcode construction opcode src pseudocode imm type dst type
+========================= ====== === ========================================= =========== ==============
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
+========================= ====== === ========================================= =========== ==============
+
+On Linux, this integer is a BTF ID.
+
Legacy BPF Packet access instructions
=====================================
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml
index 2a1cf885c99d..adf989976dcc 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
-$id: http://devicetree.org/schemas/interrupt-controller/loongarch,cpu-interrupt-controller.yaml#
+$id: http://devicetree.org/schemas/interrupt-controller/loongson,cpu-interrupt-controller.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: LoongArch CPU Interrupt Controller
@@ -11,7 +11,7 @@ maintainers:
properties:
compatible:
- const: loongarch,cpu-interrupt-controller
+ const: loongson,cpu-interrupt-controller
'#interrupt-cells':
const: 1
@@ -28,7 +28,7 @@ required:
examples:
- |
interrupt-controller {
- compatible = "loongarch,cpu-interrupt-controller";
+ compatible = "loongson,cpu-interrupt-controller";
#interrupt-cells = <1>;
interrupt-controller;
};
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
index fe9ebe285938..df64eebebe18 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
@@ -18,6 +18,8 @@ description:
PHY it is connected to. In this config, an internal mdio-bus is registered and
the MDIO master is used for communication. Mixed external and internal
mdio-bus configurations are not supported by the hardware.
+ Each phy has at most 3 LEDs connected and can be declared
+ using the standard LEDs structure.
properties:
compatible:
@@ -117,6 +119,7 @@ unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/leds/common.h>
mdio {
#address-cells = <1>;
@@ -226,6 +229,25 @@ examples:
label = "lan1";
phy-mode = "internal";
phy-handle = <&internal_phy_port1>;
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+
+ led@1 {
+ reg = <1>;
+ color = <LED_COLOR_ID_AMBER>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@2 {
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 00be387984ac..6b0d359367da 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -205,7 +205,7 @@ properties:
duplex is assumed.
pause:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Indicates that pause should be enabled.
@@ -222,6 +222,41 @@ properties:
required:
- speed
+ leds:
+ description:
+ Describes the LEDs associated by Ethernet Controller.
+ These LEDs are not integrated in the PHY and PHY doesn't have any
+ control on them. Ethernet Controller regs are used to control
+ these defined LEDs.
+
+ type: object
+
+ properties:
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ patternProperties:
+ '^led@[a-f0-9]+$':
+ $ref: /schemas/leds/common.yaml#
+
+ properties:
+ reg:
+ maxItems: 1
+ description:
+ This define the LED index in the PHY or the MAC. It's really
+ driver dependent and required for ports that define multiple
+ LED for the same port.
+
+ required:
+ - reg
+
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
dependencies:
pcs-handle-names: [pcs-handle]
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index ac04f8efa35c..4f574532ee13 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -197,6 +197,35 @@ properties:
PHY's that have configurable TX internal delays. If this property is
present then the PHY applies the TX delay.
+ leds:
+ type: object
+
+ properties:
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ patternProperties:
+ '^led@[a-f0-9]+$':
+ $ref: /schemas/leds/common.yaml#
+
+ properties:
+ reg:
+ maxItems: 1
+ description:
+ This define the LED index in the PHY or the MAC. It's really
+ driver dependent and required for ports that define multiple
+ LED for the same port.
+
+ required:
+ - reg
+
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
required:
- reg
@@ -204,6 +233,8 @@ additionalProperties: true
examples:
- |
+ #include <dt-bindings/leds/common.h>
+
ethernet {
#address-cells = <1>;
#size-cells = <0>;
@@ -219,5 +250,17 @@ examples:
reset-gpios = <&gpio1 4 1>;
reset-assert-us = <1000>;
reset-deassert-us = <2000>;
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/net/ethernet-switch.yaml b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
index 2ceccce6cbd7..f1b9075dc7fb 100644
--- a/Documentation/devicetree/bindings/net/ethernet-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
@@ -55,7 +55,7 @@ additionalProperties: true
$defs:
base:
description: An ethernet switch without any extra port properties
- $ref: '#/'
+ $ref: '#'
patternProperties:
"^(ethernet-)?port@[0-9]+$":
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.txt b/Documentation/devicetree/bindings/net/qcom,ethqos.txt
deleted file mode 100644
index 1f5746849a71..000000000000
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Qualcomm Ethernet ETHQOS device
-
-This documents dwmmac based ethernet device which supports Gigabit
-ethernet for version v2.3.0 onwards.
-
-This device has following properties:
-
-Required properties:
-
-- compatible: Should be one of:
- "qcom,qcs404-ethqos"
- "qcom,sm8150-ethqos"
-
-- reg: Address and length of the register set for the device
-
-- reg-names: Should contain register names "stmmaceth", "rgmii"
-
-- clocks: Should contain phandle to clocks
-
-- clock-names: Should contain clock names "stmmaceth", "pclk",
- "ptp_ref", "rgmii"
-
-- interrupts: Should contain phandle to interrupts
-
-- interrupt-names: Should contain interrupt names "macirq", "eth_lpi"
-
-Rest of the properties are defined in stmmac.txt file in same directory
-
-
-Example:
-
-ethernet: ethernet@7a80000 {
- compatible = "qcom,qcs404-ethqos";
- reg = <0x07a80000 0x10000>,
- <0x07a96000 0x100>;
- reg-names = "stmmaceth", "rgmii";
- clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
- clocks = <&gcc GCC_ETH_AXI_CLK>,
- <&gcc GCC_ETH_SLAVE_AHB_CLK>,
- <&gcc GCC_ETH_PTP_CLK>,
- <&gcc GCC_ETH_RGMII_CLK>;
- interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq", "eth_lpi";
- snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
- snps,reset-active-low;
-
- snps,txpbl = <8>;
- snps,rxpbl = <2>;
- snps,aal;
- snps,tso;
-
- phy-handle = <&phy1>;
- phy-mode = "rgmii";
-
- mdio {
- #address-cells = <0x1>;
- #size-cells = <0x0>;
- compatible = "snps,dwmac-mdio";
- phy1: phy@4 {
- device_type = "ethernet-phy";
- reg = <0x4>;
- };
- };
-
-};
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
new file mode 100644
index 000000000000..60a38044fb19
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,ethqos.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Ethernet ETHQOS device
+
+maintainers:
+ - Bhupesh Sharma <bhupesh.sharma@linaro.org>
+
+description:
+ dwmmac based Qualcomm ethernet devices which support Gigabit
+ ethernet (version v2.3.0 and onwards).
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcs404-ethqos
+ - qcom,sc8280xp-ethqos
+ - qcom,sm8150-ethqos
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: stmmaceth
+ - const: rgmii
+
+ interrupts:
+ items:
+ - description: Combined signal for various interrupt events
+ - description: The interrupt that occurs when Rx exits the LPI state
+
+ interrupt-names:
+ items:
+ - const: macirq
+ - const: eth_lpi
+
+ clocks:
+ maxItems: 4
+
+ clock-names:
+ items:
+ - const: stmmaceth
+ - const: pclk
+ - const: ptp_ref
+ - const: rgmii
+
+ iommus:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - reg-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/qcom,gcc-qcs404.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ ethernet: ethernet@7a80000 {
+ compatible = "qcom,qcs404-ethqos";
+ reg = <0x07a80000 0x10000>,
+ <0x07a96000 0x100>;
+ reg-names = "stmmaceth", "rgmii";
+ clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
+ clocks = <&gcc GCC_ETH_AXI_CLK>,
+ <&gcc GCC_ETH_SLAVE_AHB_CLK>,
+ <&gcc GCC_ETH_PTP_CLK>,
+ <&gcc GCC_ETH_RGMII_CLK>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
+
+ rx-fifo-depth = <4096>;
+ tx-fifo-depth = <4096>;
+
+ snps,tso;
+ snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
+ snps,reset-active-low;
+ snps,reset-delays-us = <0 10000 10000>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_defaults>;
+
+ phy-handle = <&phy1>;
+ phy-mode = "rgmii";
+ mdio {
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
+
+ compatible = "snps,dwmac-mdio";
+ phy1: phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ device_type = "ethernet-phy";
+ reg = <0x4>;
+
+ #phy-cells = <0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 74f2ddc12018..363b3e3ea3a6 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -30,6 +30,7 @@ select:
- snps,dwmac-4.10a
- snps,dwmac-4.20a
- snps,dwmac-5.10a
+ - snps,dwmac-5.20
- snps,dwxgmac
- snps,dwxgmac-2.10
@@ -65,6 +66,9 @@ properties:
- ingenic,x2000-mac
- loongson,ls2k-dwmac
- loongson,ls7a-dwmac
+ - qcom,qcs404-ethqos
+ - qcom,sc8280xp-ethqos
+ - qcom,sm8150-ethqos
- renesas,r9a06g032-gmac
- renesas,rzn1-gmac
- rockchip,px30-gmac
@@ -87,8 +91,10 @@ properties:
- snps,dwmac-4.10a
- snps,dwmac-4.20a
- snps,dwmac-5.10a
+ - snps,dwmac-5.20
- snps,dwxgmac
- snps,dwxgmac-2.10
+ - starfive,jh7110-dwmac
reg:
minItems: 1
@@ -105,7 +111,7 @@ properties:
minItems: 1
items:
- const: macirq
- - const: eth_wake_irq
+ - enum: [eth_wake_irq, eth_lpi]
- const: eth_lpi
clocks:
@@ -131,12 +137,16 @@ properties:
- ptp_ref
resets:
- maxItems: 1
- description:
- MAC Reset signal.
+ minItems: 1
+ items:
+ - description: GMAC stmmaceth reset
+ - description: AHB reset
reset-names:
- const: stmmaceth
+ minItems: 1
+ items:
+ - const: stmmaceth
+ - const: ahb
power-domains:
maxItems: 1
@@ -572,9 +582,11 @@ allOf:
- ingenic,x1600-mac
- ingenic,x1830-mac
- ingenic,x2000-mac
+ - qcom,sc8280xp-ethqos
- snps,dwmac-3.50a
- snps,dwmac-4.10a
- snps,dwmac-4.20a
+ - snps,dwmac-5.20
- snps,dwxgmac
- snps,dwxgmac-2.10
- st,spear600-gmac
@@ -625,10 +637,14 @@ allOf:
- ingenic,x1600-mac
- ingenic,x1830-mac
- ingenic,x2000-mac
+ - qcom,qcs404-ethqos
+ - qcom,sc8280xp-ethqos
+ - qcom,sm8150-ethqos
- snps,dwmac-4.00
- snps,dwmac-4.10a
- snps,dwmac-4.20a
- snps,dwmac-5.10a
+ - snps,dwmac-5.20
- snps,dwxgmac
- snps,dwxgmac-2.10
- st,spear600-gmac
diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
new file mode 100644
index 000000000000..5e7cfbbebce6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 StarFive Technology Co., Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/starfive,jh7110-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 DWMAC glue layer
+
+maintainers:
+ - Emil Renner Berthing <kernel@esmil.dk>
+ - Samin Guo <samin.guo@starfivetech.com>
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - starfive,jh7110-dwmac
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - starfive,jh7110-dwmac
+ - const: snps,dwmac-5.20
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: GMAC main clock
+ - description: GMAC AHB clock
+ - description: PTP clock
+ - description: TX clock
+ - description: GTX clock
+
+ clock-names:
+ items:
+ - const: stmmaceth
+ - const: pclk
+ - const: ptp_ref
+ - const: tx
+ - const: gtx
+
+ interrupts:
+ minItems: 3
+ maxItems: 3
+
+ interrupt-names:
+ minItems: 3
+ maxItems: 3
+
+ resets:
+ items:
+ - description: MAC Reset signal.
+ - description: AHB Reset signal.
+
+ reset-names:
+ items:
+ - const: stmmaceth
+ - const: ahb
+
+ starfive,tx-use-rgmii-clk:
+ description:
+ Tx clock is provided by external rgmii clock.
+ type: boolean
+
+ starfive,syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to syscon that configures phy mode
+ - description: Offset of phy mode selection
+ - description: Shift of phy mode selection
+ description:
+ A phandle to syscon with two arguments that configure phy mode.
+ The argument one is the offset of phy mode selection, the
+ argument two is the shift of phy mode selection.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - interrupts
+ - interrupt-names
+ - resets
+ - reset-names
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ ethernet@16030000 {
+ compatible = "starfive,jh7110-dwmac", "snps,dwmac-5.20";
+ reg = <0x16030000 0x10000>;
+ clocks = <&clk 3>, <&clk 2>, <&clk 109>,
+ <&clk 6>, <&clk 111>;
+ clock-names = "stmmaceth", "pclk", "ptp_ref",
+ "tx", "gtx";
+ resets = <&rst 1>, <&rst 2>;
+ reset-names = "stmmaceth", "ahb";
+ interrupts = <7>, <6>, <5>;
+ interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+ phy-mode = "rgmii-id";
+ snps,multicast-filter-bins = <64>;
+ snps,perfect-filter-entries = <8>;
+ rx-fifo-depth = <2048>;
+ tx-fifo-depth = <2048>;
+ snps,fixed-burst;
+ snps,no-pbl-x8;
+ snps,tso;
+ snps,force_thresh_dma_mode;
+ snps,axi-config = <&stmmac_axi_setup>;
+ snps,en-tx-lpi-clockgating;
+ snps,txpbl = <16>;
+ snps,rxpbl = <16>;
+ starfive,syscon = <&aon_syscon 0xc 0x12>;
+ phy-handle = <&phy0>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+
+ stmmac_axi_setup: stmmac-axi-config {
+ snps,lpi_en;
+ snps,wr_osr_lmt = <4>;
+ snps,rd_osr_lmt = <4>;
+ snps,blen = <256 128 64 32 0 0 0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
index 5e90051ed314..8f60a9113e7a 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
@@ -96,9 +96,11 @@ $defs:
2: Lower Slew rate (slower edges)
3: Reserved (No adjustments)
+ bias-bus-hold: true
bias-pull-down: true
bias-pull-up: true
bias-disable: true
+ input-enable: true
output-high: true
output-low: true
diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
index 1989bd67d04e..54e4f41be9b4 100644
--- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml
+++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
@@ -92,7 +92,7 @@ properties:
- description: Error interrupt
- description: Receive buffer full interrupt
- description: Transmit buffer empty interrupt
- - description: Transmit End interrupt
+ - description: Break interrupt
- items:
- description: Error interrupt
- description: Receive buffer full interrupt
@@ -107,7 +107,7 @@ properties:
- const: eri
- const: rxi
- const: txi
- - const: tei
+ - const: bri
- items:
- const: eri
- const: rxi
diff --git a/Documentation/hwmon/hwmon-kernel-api.rst b/Documentation/hwmon/hwmon-kernel-api.rst
index 5451a6d4c874..f00e4a01c240 100644
--- a/Documentation/hwmon/hwmon-kernel-api.rst
+++ b/Documentation/hwmon/hwmon-kernel-api.rst
@@ -133,7 +133,7 @@ The hwmon_chip_info structure looks as follows::
struct hwmon_chip_info {
const struct hwmon_ops *ops;
- const struct hwmon_channel_info **info;
+ const struct hwmon_channel_info * const *info;
};
It contains the following fields:
@@ -229,7 +229,7 @@ register (HWMON_T_MAX) as well as a maximum temperature hysteresis register
.config = lm75_temp_config,
};
- static const struct hwmon_channel_info *lm75_info[] = {
+ static const struct hwmon_channel_info * const lm75_info[] = {
&lm75_chip,
&lm75_temp,
NULL
@@ -238,7 +238,7 @@ register (HWMON_T_MAX) as well as a maximum temperature hysteresis register
The HWMON_CHANNEL_INFO() macro can and should be used when possible.
With this macro, the above example can be simplified to
- static const struct hwmon_channel_info *lm75_info[] = {
+ static const struct hwmon_channel_info * const lm75_info[] = {
HWMON_CHANNEL_INFO(chip,
HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
HWMON_CHANNEL_INFO(temp,
diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index bfb51685073c..c3851fe1900d 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -171,6 +171,10 @@ Getting Help
Getting LLVM
-------------
+We provide prebuilt stable versions of LLVM on `kernel.org <https://kernel.org/pub/tools/llvm/>`_.
+Below are links that may be useful for building LLVM from source or procuring
+it through a distribution's package manager.
+
- https://releases.llvm.org/download.html
- https://github.com/llvm/llvm-project
- https://llvm.org/docs/GettingStarted.html
diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
index 2160382c86be..e9c30dc75884 100644
--- a/Documentation/leds/well-known-leds.txt
+++ b/Documentation/leds/well-known-leds.txt
@@ -70,3 +70,33 @@ Good: "platform:*:charging" (allwinner sun50i)
* Screen
Good: ":backlight" (Motorola Droid 4)
+
+* Ethernet LEDs
+
+Currently two types of Network LEDs are support, those controlled by
+the PHY and those by the MAC. In theory both can be present at the
+same time for one Linux netdev, hence the names need to differ between
+MAC and PHY.
+
+Do not use the netdev name, such as eth0, enp1s0. These are not stable
+and are not unique. They also don't differentiate between MAC and PHY.
+
+** MAC LEDs
+
+Good: f1070000.ethernet:white:WAN
+Good: mdio_mux-0.1:00:green:left
+Good: 0000:02:00.0:yellow:top
+
+The first part must uniquely name the MAC controller. Then follows the
+colour. WAN/LAN should be used for a single LED. If there are
+multiple LEDs, use left/right, or top/bottom to indicate their
+position on the RJ45 socket.
+
+** PHY LEDs
+
+Good: f1072004.mdio-mii:00: white:WAN
+Good: !mdio-mux!mdio@2!switch@0!mdio:01:green:right
+Good: r8169-0-200:00:yellow:bottom
+
+The first part must uniquely name the PHY. This often means uniquely
+identifying the MDIO bus controller, and the address on the bus.
diff --git a/Documentation/mm/zsmalloc.rst b/Documentation/mm/zsmalloc.rst
index 64d127bfc221..a3c26d587752 100644
--- a/Documentation/mm/zsmalloc.rst
+++ b/Documentation/mm/zsmalloc.rst
@@ -39,13 +39,12 @@ With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
# cat /sys/kernel/debug/zsmalloc/zram0/classes
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage
+ class size 10% 20% 30% 40% 50% 60% 70% 80% 90% 99% 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
...
- 9 176 0 1 186 129 8 4
- 10 192 1 0 2880 2872 135 3
- 11 208 0 1 819 795 42 2
- 12 224 0 1 219 159 12 4
+ 30 512 0 12 4 1 0 1 0 0 1 0 414 3464 3346 433 1 14
+ 31 528 2 7 2 2 1 0 1 0 0 2 117 4154 3793 536 4 44
+ 32 544 6 3 4 1 2 1 0 0 0 1 260 4170 3965 556 2 26
...
...
@@ -54,10 +53,28 @@ class
index
size
object size zspage stores
-almost_empty
- the number of ZS_ALMOST_EMPTY zspages(see below)
-almost_full
- the number of ZS_ALMOST_FULL zspages(see below)
+10%
+ the number of zspages with usage ratio less than 10% (see below)
+20%
+ the number of zspages with usage ratio between 10% and 20%
+30%
+ the number of zspages with usage ratio between 20% and 30%
+40%
+ the number of zspages with usage ratio between 30% and 40%
+50%
+ the number of zspages with usage ratio between 40% and 50%
+60%
+ the number of zspages with usage ratio between 50% and 60%
+70%
+ the number of zspages with usage ratio between 60% and 70%
+80%
+ the number of zspages with usage ratio between 70% and 80%
+90%
+ the number of zspages with usage ratio between 80% and 90%
+99%
+ the number of zspages with usage ratio between 90% and 99%
+100%
+ the number of zspages with usage ratio 100%
obj_allocated
the number of objects allocated
obj_used
@@ -66,19 +83,14 @@ pages_used
the number of pages allocated for the class
pages_per_zspage
the number of 0-order pages to make a zspage
+freeable
+ the approximate number of pages class compaction can free
-We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where
-
-* n = number of allocated objects
-* N = total number of objects zspage can store
-* f = fullness_threshold_frac(ie, 4 at the moment)
-
-Similarly, we assign zspage to:
-
-* ZS_ALMOST_FULL when n > N / f
-* ZS_EMPTY when n == 0
-* ZS_FULL when n == N
-
+Each zspage maintains inuse counter which keeps track of the number of
+objects stored in the zspage. The inuse counter determines the zspage's
+"fullness group" which is calculated as the ratio of the "inuse" objects to
+the total number of objects the zspage can hold (objs_per_zspage). The
+closer the inuse counter is to objs_per_zspage, the better.
Internals
=========
@@ -94,10 +106,10 @@ of objects that each zspage can store.
For instance, consider the following size classes:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
- 94 1536 0 0 0 0 0 3 0
- 100 1632 0 0 0 0 0 2 0
+ 94 1536 0 .... 0 0 0 0 3 0
+ 100 1632 0 .... 0 0 0 0 2 0
...
@@ -134,10 +146,11 @@ reduces memory wastage.
Let's take a closer look at the bottom of `/sys/kernel/debug/zsmalloc/zramX/classes`:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
+
...
- 202 3264 0 0 0 0 0 4 0
- 254 4096 0 0 0 0 0 1 0
+ 202 3264 0 .. 0 0 0 0 4 0
+ 254 4096 0 .. 0 0 0 0 1 0
...
Size class #202 stores objects of size 3264 bytes and has a maximum of 4 pages
@@ -151,40 +164,42 @@ efficient storage of large objects.
For zspage chain size of 8, huge class watermark becomes 3632 bytes:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
+
...
- 202 3264 0 0 0 0 0 4 0
- 211 3408 0 0 0 0 0 5 0
- 217 3504 0 0 0 0 0 6 0
- 222 3584 0 0 0 0 0 7 0
- 225 3632 0 0 0 0 0 8 0
- 254 4096 0 0 0 0 0 1 0
+ 202 3264 0 .. 0 0 0 0 4 0
+ 211 3408 0 .. 0 0 0 0 5 0
+ 217 3504 0 .. 0 0 0 0 6 0
+ 222 3584 0 .. 0 0 0 0 7 0
+ 225 3632 0 .. 0 0 0 0 8 0
+ 254 4096 0 .. 0 0 0 0 1 0
...
For zspage chain size of 16, huge class watermark becomes 3840 bytes:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
+
...
- 202 3264 0 0 0 0 0 4 0
- 206 3328 0 0 0 0 0 13 0
- 207 3344 0 0 0 0 0 9 0
- 208 3360 0 0 0 0 0 14 0
- 211 3408 0 0 0 0 0 5 0
- 212 3424 0 0 0 0 0 16 0
- 214 3456 0 0 0 0 0 11 0
- 217 3504 0 0 0 0 0 6 0
- 219 3536 0 0 0 0 0 13 0
- 222 3584 0 0 0 0 0 7 0
- 223 3600 0 0 0 0 0 15 0
- 225 3632 0 0 0 0 0 8 0
- 228 3680 0 0 0 0 0 9 0
- 230 3712 0 0 0 0 0 10 0
- 232 3744 0 0 0 0 0 11 0
- 234 3776 0 0 0 0 0 12 0
- 235 3792 0 0 0 0 0 13 0
- 236 3808 0 0 0 0 0 14 0
- 238 3840 0 0 0 0 0 15 0
- 254 4096 0 0 0 0 0 1 0
+ 202 3264 0 .. 0 0 0 0 4 0
+ 206 3328 0 .. 0 0 0 0 13 0
+ 207 3344 0 .. 0 0 0 0 9 0
+ 208 3360 0 .. 0 0 0 0 14 0
+ 211 3408 0 .. 0 0 0 0 5 0
+ 212 3424 0 .. 0 0 0 0 16 0
+ 214 3456 0 .. 0 0 0 0 11 0
+ 217 3504 0 .. 0 0 0 0 6 0
+ 219 3536 0 .. 0 0 0 0 13 0
+ 222 3584 0 .. 0 0 0 0 7 0
+ 223 3600 0 .. 0 0 0 0 15 0
+ 225 3632 0 .. 0 0 0 0 8 0
+ 228 3680 0 .. 0 0 0 0 9 0
+ 230 3712 0 .. 0 0 0 0 10 0
+ 232 3744 0 .. 0 0 0 0 11 0
+ 234 3776 0 .. 0 0 0 0 12 0
+ 235 3792 0 .. 0 0 0 0 13 0
+ 236 3808 0 .. 0 0 0 0 14 0
+ 238 3840 0 .. 0 0 0 0 15 0
+ 254 4096 0 .. 0 0 0 0 1 0
...
Overall the combined zspage chain size effect on zsmalloc pool configuration:::
@@ -214,9 +229,10 @@ zram as a build artifacts storage (Linux kernel compilation).
zsmalloc classes stats:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
+
...
- Total 13 51 413836 412973 159955 3
+ Total 13 .. 51 413836 412973 159955 3
zram mm_stat:::
@@ -227,9 +243,10 @@ zram as a build artifacts storage (Linux kernel compilation).
zsmalloc classes stats:::
- class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
+ class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
+
...
- Total 18 87 414852 412978 156666 0
+ Total 18 .. 87 414852 412978 156666 0
zram mm_stat:::
diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
new file mode 100644
index 000000000000..614f1a585511
--- /dev/null
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+#
+# Author: Chuck Lever <chuck.lever@oracle.com>
+#
+# Copyright (c) 2023, Oracle and/or its affiliates.
+#
+
+name: handshake
+
+protocol: genetlink
+
+doc: Netlink protocol to request a transport layer security handshake.
+
+definitions:
+ -
+ type: enum
+ name: handler-class
+ value-start: 0
+ entries: [ none, tlshd, max ]
+ -
+ type: enum
+ name: msg-type
+ value-start: 0
+ entries: [ unspec, clienthello, serverhello ]
+ -
+ type: enum
+ name: auth
+ value-start: 0
+ entries: [ unspec, unauth, psk, x509 ]
+
+attribute-sets:
+ -
+ name: x509
+ attributes:
+ -
+ name: cert
+ type: u32
+ -
+ name: privkey
+ type: u32
+ -
+ name: accept
+ attributes:
+ -
+ name: sockfd
+ type: u32
+ -
+ name: handler-class
+ type: u32
+ enum: handler-class
+ -
+ name: message-type
+ type: u32
+ enum: msg-type
+ -
+ name: timeout
+ type: u32
+ -
+ name: auth-mode
+ type: u32
+ enum: auth
+ -
+ name: peer-identity
+ type: u32
+ multi-attr: true
+ -
+ name: certificate
+ type: nest
+ nested-attributes: x509
+ multi-attr: true
+ -
+ name: done
+ attributes:
+ -
+ name: status
+ type: u32
+ -
+ name: sockfd
+ type: u32
+ -
+ name: remote-auth
+ type: u32
+ multi-attr: true
+
+operations:
+ list:
+ -
+ name: ready
+ doc: Notify handlers that a new handshake request is waiting
+ notify: accept
+ -
+ name: accept
+ doc: Handler retrieves next queued handshake request
+ attribute-set: accept
+ flags: [ admin-perm ]
+ do:
+ request:
+ attributes:
+ - handler-class
+ reply:
+ attributes:
+ - sockfd
+ - message-type
+ - timeout
+ - auth-mode
+ - peer-identity
+ - certificate
+ -
+ name: done
+ doc: Handler reports handshake completion
+ attribute-set: done
+ do:
+ request:
+ attributes:
+ - status
+ - sockfd
+ - remote-auth
+
+mcast-groups:
+ list:
+ -
+ name: none
+ -
+ name: tlshd
diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
new file mode 100644
index 000000000000..9e8a16c44102
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+========================================================
+Linux Driver for the AMD/Pensando(R) DSC adapter family
+========================================================
+
+Copyright(c) 2023 Advanced Micro Devices, Inc
+
+Identifying the Adapter
+=======================
+
+To find if one or more AMD/Pensando PCI Core devices are installed on the
+host, check for the PCI devices::
+
+ # lspci -d 1dd8:100c
+ b5:00.0 Processing accelerators: Pensando Systems Device 100c
+ b6:00.0 Processing accelerators: Pensando Systems Device 100c
+
+If such devices are listed as above, then the pds_core.ko driver should find
+and configure them for use. There should be log entries in the kernel
+messages such as these::
+
+ $ dmesg | grep pds_core
+ pds_core 0000:b5:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
+ pds_core 0000:b5:00.0: FW: 1.60.0-73
+ pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
+ pds_core 0000:b6:00.0: FW: 1.60.0-73
+
+Driver and firmware version information can be gathered with devlink::
+
+ $ devlink dev info pci/0000:b5:00.0
+ pci/0000:b5:00.0:
+ driver pds_core
+ serial_number FLM18420073
+ versions:
+ fixed:
+ asic.id 0x0
+ asic.rev 0x0
+ running:
+ fw 1.51.0-73
+ stored:
+ fw.goldfw 1.15.9-C-22
+ fw.mainfwa 1.60.0-73
+ fw.mainfwb 1.60.0-57
+
+Info versions
+=============
+
+The ``pds_core`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``fw``
+ - running
+ - Version of firmware running on the device
+ * - ``fw.goldfw``
+ - stored
+ - Version of firmware stored in the goldfw slot
+ * - ``fw.mainfwa``
+ - stored
+ - Version of firmware stored in the mainfwa slot
+ * - ``fw.mainfwb``
+ - stored
+ - Version of firmware stored in the mainfwb slot
+ * - ``asic.id``
+ - fixed
+ - The ASIC type for this device
+ * - ``asic.rev``
+ - fixed
+ - The revision of the ASIC for this device
+
+Parameters
+==========
+
+The ``pds_core`` driver implements the following generic
+parameters for controlling the functionality to be made available
+as auxiliary_bus devices.
+
+.. list-table:: Generic parameters implemented
+ :widths: 5 5 8 82
+
+ * - Name
+ - Mode
+ - Type
+ - Description
+ * - ``enable_vnet``
+ - runtime
+ - Boolean
+ - Enables vDPA functionality through an auxiliary_bus device
+
+Firmware Management
+===================
+
+The ``flash`` command can update a the DSC firmware. The downloaded firmware
+will be saved into either of firmware bank 1 or bank 2, whichever is not
+currently in use, and that bank will used for the next boot::
+
+ # devlink dev flash pci/0000:b5:00.0 \
+ file pensando/dsc_fw_1.63.0-22.tar
+
+Health Reporters
+================
+
+The driver supports a devlink health reporter for FW status::
+
+ # devlink health show pci/0000:2b:00.0 reporter fw
+ pci/0000:2b:00.0:
+ reporter fw
+ state healthy error 0 recover 0
+ # devlink health diagnose pci/0000:2b:00.0 reporter fw
+ Status: healthy State: 1 Generation: 0 Recoveries: 0
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+ make oldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet driver support
+ -> AMD devices
+ -> AMD/Pensando Ethernet PDS_CORE Support
+
+Support
+=======
+
+For general Linux networking support, please use the netdev mailing
+list, which is monitored by AMD/Pensando personnel::
+
+ netdev@vger.kernel.org
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 6e9e7012d000..417ca514a4d0 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -14,6 +14,7 @@ Contents:
3com/vortex
amazon/ena
altera/altera_tse
+ amd/pds_core
aquantia/atlantic
chelsio/cxgb
cirrus/cs89x0
diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst
index 10f282c2117c..2f60e34ab926 100644
--- a/Documentation/networking/devlink/ice.rst
+++ b/Documentation/networking/devlink/ice.rst
@@ -7,6 +7,21 @@ ice devlink support
This document describes the devlink features implemented by the ``ice``
device driver.
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ - Notes
+ * - ``enable_roce``
+ - runtime
+ - mutually exclusive with ``enable_iwarp``
+ * - ``enable_iwarp``
+ - runtime
+ - mutually exclusive with ``enable_roce``
+
Info versions
=============
diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index 64f7236ff10b..4f5dfa9c022e 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -4,94 +4,124 @@
Softnet Driver Issues
=====================
-Transmit path guidelines:
+Probing guidelines
+==================
-1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
- any normal circumstances. It is considered a hard error unless
- there is no way your device can tell ahead of time when its
- transmit function will become busy.
+Address validation
+------------------
- Instead it must maintain the queue properly. For example,
- for a driver implementing scatter-gather this means::
+Any hardware layer address you obtain for your device should
+be verified. For example, for ethernet check it with
+linux/etherdevice.h:is_valid_ether_addr()
+
+Close/stop guidelines
+=====================
+
+Quiescence
+----------
+
+After the ndo_stop routine has been called, the hardware must
+not receive or transmit any data. All in flight packets must
+be aborted. If necessary, poll or wait for completion of
+any reset commands.
+
+Auto-close
+----------
+
+The ndo_stop routine will be called by unregister_netdevice
+if device is still UP.
+
+Transmit path guidelines
+========================
+
+Stop queues in advance
+----------------------
+
+The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+any normal circumstances. It is considered a hard error unless
+there is no way your device can tell ahead of time when its
+transmit function will become busy.
+
+Instead it must maintain the queue properly. For example,
+for a driver implementing scatter-gather this means:
+
+.. code-block:: c
+
+ static u32 drv_tx_avail(struct drv_ring *dr)
+ {
+ u32 used = READ_ONCE(dr->prod) - READ_ONCE(dr->cons);
+
+ return dr->tx_ring_size - (used & bp->tx_ring_mask);
+ }
static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct drv *dp = netdev_priv(dev);
+ struct netdev_queue *txq;
+ struct drv_ring *dr;
+ int idx;
- lock_tx(dp);
- ...
- /* This is a hard error log it. */
- if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+ idx = skb_get_queue_mapping(skb);
+ dr = dp->tx_rings[idx];
+ txq = netdev_get_tx_queue(dev, idx);
+
+ //...
+ /* This should be a very rare race - log it. */
+ if (drv_tx_avail(dr) <= skb_shinfo(skb)->nr_frags + 1) {
netif_stop_queue(dev);
- unlock_tx(dp);
- printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
- dev->name);
+ netdev_warn(dev, "Tx Ring full when queue awake!\n");
return NETDEV_TX_BUSY;
}
- ... queue packet to card ...
- ... update tx consumer index ...
-
- if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
- netif_stop_queue(dev);
-
- ...
- unlock_tx(dp);
- ...
- return NETDEV_TX_OK;
- }
-
- And then at the end of your TX reclamation event handling::
+ //... queue packet to card ...
- if (netif_queue_stopped(dp->dev) &&
- TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
- netif_wake_queue(dp->dev);
+ netdev_tx_sent_queue(txq, skb->len);
- For a non-scatter-gather supporting card, the three tests simply become::
+ //... update tx producer index using WRITE_ONCE() ...
- /* This is a hard error log it. */
- if (TX_BUFFS_AVAIL(dp) <= 0)
+ if (!netif_txq_maybe_stop(txq, drv_tx_avail(dr),
+ MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS))
+ dr->stats.stopped++;
- and::
+ //...
+ return NETDEV_TX_OK;
+ }
- if (TX_BUFFS_AVAIL(dp) == 0)
+And then at the end of your TX reclamation event handling:
- and::
+.. code-block:: c
- if (netif_queue_stopped(dp->dev) &&
- TX_BUFFS_AVAIL(dp) > 0)
- netif_wake_queue(dp->dev);
+ //... update tx consumer index using WRITE_ONCE() ...
-2) An ndo_start_xmit method must not modify the shared parts of a
- cloned SKB.
+ netif_txq_completed_wake(txq, cmpl_pkts, cmpl_bytes,
+ drv_tx_avail(dr), 2 * MAX_SKB_FRAGS);
-3) Do not forget that once you return NETDEV_TX_OK from your
- ndo_start_xmit method, it is your driver's responsibility to free
- up the SKB and in some finite amount of time.
+Lockless queue stop / wake helper macros
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- For example, this means that it is not allowed for your TX
- mitigation scheme to let TX packets "hang out" in the TX
- ring unreclaimed forever if no new TX packets are sent.
- This error can deadlock sockets waiting for send buffer room
- to be freed up.
+.. kernel-doc:: include/net/netdev_queues.h
+ :doc: Lockless queue stopping / waking helpers.
- If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
- must not keep any reference to that SKB and you must not attempt
- to free it up.
+No exclusive ownership
+----------------------
-Probing guidelines:
+An ndo_start_xmit method must not modify the shared parts of a
+cloned SKB.
-1) Any hardware layer address you obtain for your device should
- be verified. For example, for ethernet check it with
- linux/etherdevice.h:is_valid_ether_addr()
+Timely completions
+------------------
-Close/stop guidelines:
+Do not forget that once you return NETDEV_TX_OK from your
+ndo_start_xmit method, it is your driver's responsibility to free
+up the SKB and in some finite amount of time.
-1) After the ndo_stop routine has been called, the hardware must
- not receive or transmit any data. All in flight packets must
- be aborted. If necessary, poll or wait for completion of
- any reset commands.
+For example, this means that it is not allowed for your TX
+mitigation scheme to let TX packets "hang out" in the TX
+ring unreclaimed forever if no new TX packets are sent.
+This error can deadlock sockets waiting for send buffer room
+to be freed up.
-2) The ndo_stop routine will be called by unregister_netdevice
- if device is still UP.
+If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+must not keep any reference to that SKB and you must not attempt
+to free it up.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 24bb256d6d53..a164ff074356 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -36,6 +36,7 @@ Contents:
scaling
tls
tls-offload
+ tls-handshake
nfc
6lowpan
6pack
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 87dd1c5283e6..6ec06a33688a 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -340,6 +340,8 @@ tcp_app_win - INTEGER
Reserve max(window/2^tcp_app_win, mss) of window for application
buffer. Value 0 is special, it means that nothing is reserved.
+ Possible values are [0, 31], inclusive.
+
Default: 31
tcp_autocorking - BOOLEAN
@@ -2719,6 +2721,13 @@ echo_ignore_anycast - BOOLEAN
Default: 0
+error_anycast_as_unicast - BOOLEAN
+ If set to 1, then the kernel will respond with ICMP Errors
+ resulting from requests sent to it over the IPv6 protocol destined
+ to anycast address essentially treating anycast as unicast.
+
+ Default: 0
+
xfrm6_gc_thresh - INTEGER
(Obsolete since linux-4.14)
The threshold at which we will start garbage collecting for IPv6
diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
index 30f1344e7cca..873efd97f822 100644
--- a/Documentation/networking/page_pool.rst
+++ b/Documentation/networking/page_pool.rst
@@ -165,6 +165,7 @@ Registration
pp_params.pool_size = DESC_NUM;
pp_params.nid = NUMA_NO_NODE;
pp_params.dev = priv->dev;
+ pp_params.napi = napi; /* only if locking is tied to NAPI */
pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
page_pool = page_pool_create(&pp_params);
diff --git a/Documentation/networking/tls-handshake.rst b/Documentation/networking/tls-handshake.rst
new file mode 100644
index 000000000000..a2817a88e905
--- /dev/null
+++ b/Documentation/networking/tls-handshake.rst
@@ -0,0 +1,217 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+In-Kernel TLS Handshake
+=======================
+
+Overview
+========
+
+Transport Layer Security (TLS) is a Upper Layer Protocol (ULP) that runs
+over TCP. TLS provides end-to-end data integrity and confidentiality in
+addition to peer authentication.
+
+The kernel's kTLS implementation handles the TLS record subprotocol, but
+does not handle the TLS handshake subprotocol which is used to establish
+a TLS session. Kernel consumers can use the API described here to
+request TLS session establishment.
+
+There are several possible ways to provide a handshake service in the
+kernel. The API described here is designed to hide the details of those
+implementations so that in-kernel TLS consumers do not need to be
+aware of how the handshake gets done.
+
+
+User handshake agent
+====================
+
+As of this writing, there is no TLS handshake implementation in the
+Linux kernel. To provide a handshake service, a handshake agent
+(typically in user space) is started in each network namespace where a
+kernel consumer might require a TLS handshake. Handshake agents listen
+for events sent from the kernel that indicate a handshake request is
+waiting.
+
+An open socket is passed to a handshake agent via a netlink operation,
+which creates a socket descriptor in the agent's file descriptor table.
+If the handshake completes successfully, the handshake agent promotes
+the socket to use the TLS ULP and sets the session information using the
+SOL_TLS socket options. The handshake agent returns the socket to the
+kernel via a second netlink operation.
+
+
+Kernel Handshake API
+====================
+
+A kernel TLS consumer initiates a client-side TLS handshake on an open
+socket by invoking one of the tls_client_hello() functions. First, it
+fills in a structure that contains the parameters of the request:
+
+.. code-block:: c
+
+ struct tls_handshake_args {
+ struct socket *ta_sock;
+ tls_done_func_t ta_done;
+ void *ta_data;
+ unsigned int ta_timeout_ms;
+ key_serial_t ta_keyring;
+ key_serial_t ta_my_cert;
+ key_serial_t ta_my_privkey;
+ unsigned int ta_num_peerids;
+ key_serial_t ta_my_peerids[5];
+ };
+
+The @ta_sock field references an open and connected socket. The consumer
+must hold a reference on the socket to prevent it from being destroyed
+while the handshake is in progress. The consumer must also have
+instantiated a struct file in sock->file.
+
+
+@ta_done contains a callback function that is invoked when the handshake
+has completed. Further explanation of this function is in the "Handshake
+Completion" sesction below.
+
+The consumer can fill in the @ta_timeout_ms field to force the servicing
+handshake agent to exit after a number of milliseconds. This enables the
+socket to be fully closed once both the kernel and the handshake agent
+have closed their endpoints.
+
+Authentication material such as x.509 certificates, private certificate
+keys, and pre-shared keys are provided to the handshake agent in keys
+that are instantiated by the consumer before making the handshake
+request. The consumer can provide a private keyring that is linked into
+the handshake agent's process keyring in the @ta_keyring field to prevent
+access of those keys by other subsystems.
+
+To request an x.509-authenticated TLS session, the consumer fills in
+the @ta_my_cert and @ta_my_privkey fields with the serial numbers of
+keys containing an x.509 certificate and the private key for that
+certificate. Then, it invokes this function:
+
+.. code-block:: c
+
+ ret = tls_client_hello_x509(args, gfp_flags);
+
+The function returns zero when the handshake request is under way. A
+zero return guarantees the callback function @ta_done will be invoked
+for this socket. The function returns a negative errno if the handshake
+could not be started. A negative errno guarantees the callback function
+@ta_done will not be invoked on this socket.
+
+
+To initiate a client-side TLS handshake with a pre-shared key, use:
+
+.. code-block:: c
+
+ ret = tls_client_hello_psk(args, gfp_flags);
+
+However, in this case, the consumer fills in the @ta_my_peerids array
+with serial numbers of keys containing the peer identities it wishes
+to offer, and the @ta_num_peerids field with the number of array
+entries it has filled in. The other fields are filled in as above.
+
+
+To initiate an anonymous client-side TLS handshake use:
+
+.. code-block:: c
+
+ ret = tls_client_hello_anon(args, gfp_flags);
+
+The handshake agent presents no peer identity information to the remote
+during this type of handshake. Only server authentication (ie the client
+verifies the server's identity) is performed during the handshake. Thus
+the established session uses encryption only.
+
+
+Consumers that are in-kernel servers use:
+
+.. code-block:: c
+
+ ret = tls_server_hello_x509(args, gfp_flags);
+
+or
+
+.. code-block:: c
+
+ ret = tls_server_hello_psk(args, gfp_flags);
+
+The argument structure is filled in as above.
+
+
+If the consumer needs to cancel the handshake request, say, due to a ^C
+or other exigent event, the consumer can invoke:
+
+.. code-block:: c
+
+ bool tls_handshake_cancel(sock);
+
+This function returns true if the handshake request associated with
+@sock has been canceled. The consumer's handshake completion callback
+will not be invoked. If this function returns false, then the consumer's
+completion callback has already been invoked.
+
+
+Handshake Completion
+====================
+
+When the handshake agent has completed processing, it notifies the
+kernel that the socket may be used by the consumer again. At this point,
+the consumer's handshake completion callback, provided in the @ta_done
+field in the tls_handshake_args structure, is invoked.
+
+The synopsis of this function is:
+
+.. code-block:: c
+
+ typedef void (*tls_done_func_t)(void *data, int status,
+ key_serial_t peerid);
+
+The consumer provides a cookie in the @ta_data field of the
+tls_handshake_args structure that is returned in the @data parameter of
+this callback. The consumer uses the cookie to match the callback to the
+thread waiting for the handshake to complete.
+
+The success status of the handshake is returned via the @status
+parameter:
+
++------------+----------------------------------------------+
+| status | meaning |
++============+==============================================+
+| 0 | TLS session established successfully |
++------------+----------------------------------------------+
+| -EACCESS | Remote peer rejected the handshake or |
+| | authentication failed |
++------------+----------------------------------------------+
+| -ENOMEM | Temporary resource allocation failure |
++------------+----------------------------------------------+
+| -EINVAL | Consumer provided an invalid argument |
++------------+----------------------------------------------+
+| -ENOKEY | Missing authentication material |
++------------+----------------------------------------------+
+| -EIO | An unexpected fault occurred |
++------------+----------------------------------------------+
+
+The @peerid parameter contains the serial number of a key containing the
+remote peer's identity or the value TLS_NO_PEERID if the session is not
+authenticated.
+
+A best practice is to close and destroy the socket immediately if the
+handshake failed.
+
+
+Other considerations
+--------------------
+
+While a handshake is under way, the kernel consumer must alter the
+socket's sk_data_ready callback function to ignore all incoming data.
+Once the handshake completion callback function has been invoked, normal
+receive operation can be resumed.
+
+Once a TLS session is established, the consumer must provide a buffer
+for and then examine the control message (CMSG) that is part of every
+subsequent sock_recvmsg(). Each control message indicates whether the
+received message data is TLS record data or session metadata.
+
+See tls.rst for details on how a kTLS consumer recognizes incoming
+(decrypted) application data, alerts, and handshake packets once the
+socket has been promoted to use the TLS ULP.
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index cb6abcb2b6d0..deb8235e20ff 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -138,7 +138,7 @@ required reading:
philosophy and is very important for people moving to Linux from
development on other Operating Systems.
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
If you feel you have found a security problem in the Linux kernel,
please follow the steps in this document to help notify the kernel
developers, and help solve the issue.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index d4b6217472b0..565df595152e 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -35,6 +35,14 @@ Below are the essential guides that every developer should read.
kernel-enforcement-statement
kernel-driver-statement
+For security issues, see:
+
+.. toctree::
+ :maxdepth: 1
+
+ security-bugs
+ embargoed-hardware-issues
+
Other guides to the community that are of interest to most developers are:
.. toctree::
@@ -47,7 +55,6 @@ Other guides to the community that are of interest to most developers are:
submit-checklist
kernel-docs
deprecated
- embargoed-hardware-issues
maintainers
researcher-guidelines
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
index afc944e0e898..9fcfed3c350b 100644
--- a/Documentation/process/researcher-guidelines.rst
+++ b/Documentation/process/researcher-guidelines.rst
@@ -68,7 +68,7 @@ Before contributing, carefully read the appropriate documentation:
* Documentation/process/development-process.rst
* Documentation/process/submitting-patches.rst
* Documentation/admin-guide/reporting-issues.rst
-* Documentation/admin-guide/security-bugs.rst
+* Documentation/process/security-bugs.rst
Then send a patch (including a commit log with all the details listed
below) and follow up on any feedback from other developers.
diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/process/security-bugs.rst
index 82e29837d589..82e29837d589 100644
--- a/Documentation/admin-guide/security-bugs.rst
+++ b/Documentation/process/security-bugs.rst
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 2fd8aa593a28..51df1197d5ab 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree
Security patches should not be handled (solely) by the -stable review
process but should follow the procedures in
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`.
For all other submissions, choose one of the following procedures
-----------------------------------------------------------------
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 69ce64e03c70..828997bc9ff9 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -254,7 +254,7 @@ If you have a patch that fixes an exploitable security bug, send that patch
to security@kernel.org. For severe bugs, a short embargo may be considered
to allow distributors to get the patch out to users; in such cases,
obviously, the patch should not be sent to any public lists. See also
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
Patches that fix a severe bug in a released kernel should be directed
toward the stable maintainers by putting a line like this::
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index 3be44e74ec5d..5462c84f4723 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -47,7 +47,7 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
+ ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap
ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
@@ -83,7 +83,7 @@ RISC-V Linux Kernel SV48
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap
+ ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap
ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io
ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap
ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space
@@ -119,7 +119,7 @@ RISC-V Linux Kernel SV57
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap
+ ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap
ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io
ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap
ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space
diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst
index ed7f4f5b3cf1..b91e9ef4d0c2 100644
--- a/Documentation/rust/arch-support.rst
+++ b/Documentation/rust/arch-support.rst
@@ -15,7 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
============ ================ ==============================================
Architecture Level of support Constraints
============ ================ ==============================================
-``x86`` Maintained ``x86_64`` only.
``um`` Maintained ``x86_64`` only.
+``x86`` Maintained ``x86_64`` only.
============ ================ ==============================================
diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 9b52f50a6854..120430450014 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst
@@ -704,7 +704,7 @@ ref
no-jd
BIOS setup but without jack-detection
intel
- Intel DG45* mobos
+ Intel D*45* mobos
dell-m6-amic
Dell desktops/laptops with analog mics
dell-m6-dmic
diff --git a/Documentation/translations/it_IT/admin-guide/security-bugs.rst b/Documentation/translations/it_IT/admin-guide/security-bugs.rst
index 18a5822c7d9a..20994f4bfa31 100644
--- a/Documentation/translations/it_IT/admin-guide/security-bugs.rst
+++ b/Documentation/translations/it_IT/admin-guide/security-bugs.rst
@@ -1,6 +1,6 @@
.. include:: ../disclaimer-ita.rst
-:Original: :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+:Original: :ref:`Documentation/process/security-bugs.rst <securitybugs>`
.. _it_securitybugs:
diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst
index c2cfa0948b2b..167fce813032 100644
--- a/Documentation/translations/it_IT/process/submitting-patches.rst
+++ b/Documentation/translations/it_IT/process/submitting-patches.rst
@@ -272,7 +272,7 @@ embargo potrebbe essere preso in considerazione per dare il tempo alle
distribuzioni di prendere la patch e renderla disponibile ai loro utenti;
in questo caso, ovviamente, la patch non dovrebbe essere inviata su alcuna
lista di discussione pubblica. Leggete anche
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
Patch che correggono bachi importanti su un kernel già rilasciato, dovrebbero
essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga::
diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst
index 9b0b3436dfcf..8d856ebe873c 100644
--- a/Documentation/translations/ja_JP/howto.rst
+++ b/Documentation/translations/ja_JP/howto.rst
@@ -167,7 +167,7 @@ linux-api@vger.kernel.org に送ることを勧めます。
このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
そして、他のOSでの開発者が Linux に移る時にとても重要です。
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
支援してください。
diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst
index 969e91a95bb0..34f14899c155 100644
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@@ -157,7 +157,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
리눅스로 전향하는 사람들에게는 매우 중요하다.
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
도와 달라.
diff --git a/Documentation/translations/sp_SP/howto.rst b/Documentation/translations/sp_SP/howto.rst
index f9818d687b54..f1629738b49d 100644
--- a/Documentation/translations/sp_SP/howto.rst
+++ b/Documentation/translations/sp_SP/howto.rst
@@ -135,7 +135,7 @@ de obligada lectura:
de Linux y es muy importante para las personas que se mudan a Linux
tras desarrollar otros sistemas operativos.
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
Si cree que ha encontrado un problema de seguridad en el kernel de
Linux, siga los pasos de este documento para ayudar a notificar a los
desarrolladores del kernel y ayudar a resolver el problema.
diff --git a/Documentation/translations/sp_SP/process/submitting-patches.rst b/Documentation/translations/sp_SP/process/submitting-patches.rst
index bf95ceb5e865..c2757d9ab216 100644
--- a/Documentation/translations/sp_SP/process/submitting-patches.rst
+++ b/Documentation/translations/sp_SP/process/submitting-patches.rst
@@ -276,7 +276,7 @@ parche a security@kernel.org. Para errores graves, se debe mantener un
poco de discreción y permitir que los distribuidores entreguen el parche a
los usuarios; en esos casos, obviamente, el parche no debe enviarse a
ninguna lista pública. Revise también
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
Los parches que corrigen un error grave en un kernel en uso deben dirigirse
hacia los maintainers estables poniendo una línea como esta::
diff --git a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
index b8120391755d..d6b8f8a4e7f6 100644
--- a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
+++ b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
@@ -1,6 +1,6 @@
.. include:: ../disclaimer-zh_CN.rst
-:Original: :doc:`../../../admin-guide/security-bugs`
+:Original: :doc:`../../../process/security-bugs`
:译者:
diff --git a/Documentation/translations/zh_CN/process/howto.rst b/Documentation/translations/zh_CN/process/howto.rst
index 10254751df6a..cc47be356dd3 100644
--- a/Documentation/translations/zh_CN/process/howto.rst
+++ b/Documentation/translations/zh_CN/process/howto.rst
@@ -125,7 +125,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系
统转移到Linux的人来说也很重要。
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来
提醒其他内核开发者并帮助解决这个问题。
diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
index eed260ef0c37..15f8e9005071 100644
--- a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
+++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
@@ -2,7 +2,7 @@
.. include:: ../disclaimer-zh_TW.rst
-:Original: :doc:`../../../admin-guide/security-bugs`
+:Original: :doc:`../../../process/security-bugs`
:譯者:
diff --git a/Documentation/translations/zh_TW/process/howto.rst b/Documentation/translations/zh_TW/process/howto.rst
index 8fb8edcaee66..ea2f468d3e58 100644
--- a/Documentation/translations/zh_TW/process/howto.rst
+++ b/Documentation/translations/zh_TW/process/howto.rst
@@ -128,7 +128,7 @@ Linux內核代碼中包含有大量的文檔。這些文檔對於學習如何與
這篇文檔對於理解Linux的開發哲學至關重要。對於將開發平台從其他操作系
統轉移到Linux的人來說也很重要。
- :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+ :ref:`Documentation/process/security-bugs.rst <securitybugs>`
如果你認爲自己發現了Linux內核的安全性問題,請根據這篇文檔中的步驟來
提醒其他內核開發者並幫助解決這個問題。
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 62de0768d6aa..a5c803f39832 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8296,11 +8296,11 @@ ENOSYS for the others.
8.35 KVM_CAP_PMU_CAPABILITY
---------------------------
-:Capability KVM_CAP_PMU_CAPABILITY
+:Capability: KVM_CAP_PMU_CAPABILITY
:Architectures: x86
:Type: vm
:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
-:Returns 0 on success, -EINVAL when arg[0] contains invalid bits
+:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
This capability alters PMU virtualization in KVM.
diff --git a/MAINTAINERS b/MAINTAINERS
index abecba7138f1..6ac562e0381e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -73,7 +73,7 @@ Tips for patch submitters
and ideally, should come with a patch proposal. Please do not send
automated reports to this list either. Such bugs will be handled
better and faster in the usual public places. See
- Documentation/admin-guide/security-bugs.rst for details.
+ Documentation/process/security-bugs.rst for details.
8. Happy hacking.
@@ -224,13 +224,13 @@ S: Orphan / Obsolete
F: drivers/net/ethernet/8390/
9P FILE SYSTEM
-M: Eric Van Hensbergen <ericvh@gmail.com>
+M: Eric Van Hensbergen <ericvh@kernel.org>
M: Latchesar Ionkov <lucho@ionkov.net>
M: Dominique Martinet <asmadeus@codewreck.org>
R: Christian Schoenebeck <linux_oss@crudebyte.com>
-L: v9fs-developer@lists.sourceforge.net
+L: v9fs@lists.linux.dev
S: Maintained
-W: http://swik.net/v9fs
+W: http://github.com/v9fs
Q: http://patchwork.kernel.org/project/v9fs-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
T: git git://github.com/martinetd/linux.git
@@ -1041,6 +1041,15 @@ F: drivers/gpu/drm/amd/include/vi_structs.h
F: include/uapi/linux/kfd_ioctl.h
F: include/uapi/linux/kfd_sysfs.h
+AMD PDS CORE DRIVER
+M: Shannon Nelson <shannon.nelson@amd.com>
+M: Brett Creeley <brett.creeley@amd.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+F: drivers/net/ethernet/amd/pds_core/
+F: include/linux/pds/
+
AMD SPI DRIVER
M: Sanjay R Mehta <sanju.mehta@amd.com>
S: Maintained
@@ -4468,14 +4477,14 @@ F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt
F: drivers/net/ieee802154/ca8210.c
CANAAN/KENDRYTE K210 SOC FPIOA DRIVER
-M: Damien Le Moal <damien.lemoal@wdc.com>
+M: Damien Le Moal <dlemoal@kernel.org>
L: linux-riscv@lists.infradead.org
L: linux-gpio@vger.kernel.org (pinctrl driver)
F: Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
F: drivers/pinctrl/pinctrl-k210.c
CANAAN/KENDRYTE K210 SOC RESET CONTROLLER DRIVER
-M: Damien Le Moal <damien.lemoal@wdc.com>
+M: Damien Le Moal <dlemoal@kernel.org>
L: linux-kernel@vger.kernel.org
L: linux-riscv@lists.infradead.org
S: Maintained
@@ -4483,7 +4492,7 @@ F: Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
F: drivers/reset/reset-k210.c
CANAAN/KENDRYTE K210 SOC SYSTEM CONTROLLER DRIVER
-M: Damien Le Moal <damien.lemoal@wdc.com>
+M: Damien Le Moal <dlemoal@kernel.org>
L: linux-riscv@lists.infradead.org
S: Maintained
F: Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
@@ -8947,6 +8956,17 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/
T: git git://linuxtv.org/anttip/media_tree.git
F: drivers/media/usb/hackrf/
+HANDSHAKE UPCALL FOR TRANSPORT LAYER SECURITY
+M: Chuck Lever <chuck.lever@oracle.com>
+L: kernel-tls-handshake@lists.linux.dev
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/netlink/specs/handshake.yaml
+F: Documentation/networking/tls-handshake.rst
+F: include/net/handshake.h
+F: include/trace/events/handshake.h
+F: net/handshake/
+
HANTRO VPU CODEC DRIVER
M: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
M: Philipp Zabel <p.zabel@pengutronix.de>
@@ -11765,7 +11785,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: drivers/ata/sata_promise.*
LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
-M: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+M: Damien Le Moal <dlemoal@kernel.org>
L: linux-ide@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
@@ -14612,11 +14632,14 @@ F: net/netlabel/
NETWORKING [MPTCP]
M: Matthieu Baerts <matthieu.baerts@tessares.net>
+M: Mat Martineau <martineau@kernel.org>
L: netdev@vger.kernel.org
L: mptcp@lists.linux.dev
S: Maintained
W: https://github.com/multipath-tcp/mptcp_net-next/wiki
B: https://github.com/multipath-tcp/mptcp_net-next/issues
+T: git https://github.com/multipath-tcp/mptcp_net-next.git export-net
+T: git https://github.com/multipath-tcp/mptcp_net-next.git export
F: Documentation/networking/mptcp-sysctl.rst
F: include/net/mptcp.h
F: include/trace/events/mptcp.h
@@ -17304,7 +17327,7 @@ M: Vinod Koul <vkoul@kernel.org>
R: Bhupesh Sharma <bhupesh.sharma@linaro.org>
L: netdev@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/net/qcom,ethqos.txt
+F: Documentation/devicetree/bindings/net/qcom,ethqos.yaml
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
QUALCOMM FASTRPC DRIVER
@@ -18312,8 +18335,9 @@ F: drivers/s390/block/dasd*
F: include/linux/dasd_mod.h
S390 IOMMU (PCI)
+M: Niklas Schnelle <schnelle@linux.ibm.com>
M: Matthew Rosato <mjrosato@linux.ibm.com>
-M: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+R: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
F: drivers/iommu/s390-iommu.c
@@ -18822,7 +18846,7 @@ F: include/uapi/linux/sed*
SECURITY CONTACT
M: Security Officers <security@kernel.org>
S: Supported
-F: Documentation/admin-guide/security-bugs.rst
+F: Documentation/process/security-bugs.rst
SECURITY SUBSYSTEM
M: Paul Moore <paul@paul-moore.com>
@@ -19925,6 +19949,13 @@ M: Emil Renner Berthing <kernel@esmil.dk>
S: Maintained
F: arch/riscv/boot/dts/starfive/
+STARFIVE DWMAC GLUE LAYER
+M: Emil Renner Berthing <kernel@esmil.dk>
+M: Samin Guo <samin.guo@starfivetech.com>
+S: Maintained
+F: Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+F: drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+
STARFIVE JH7100 CLOCK DRIVERS
M: Emil Renner Berthing <kernel@esmil.dk>
S: Maintained
@@ -23138,7 +23169,7 @@ S: Maintained
F: arch/x86/kernel/cpu/zhaoxin.c
ZONEFS FILESYSTEM
-M: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+M: Damien Le Moal <dlemoal@kernel.org>
M: Naohiro Aota <naohiro.aota@wdc.com>
R: Johannes Thumshirn <jth@kernel.org>
L: linux-fsdevel@vger.kernel.org
diff --git a/Makefile b/Makefile
index da2586d4c728..b5c48e3c935a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 3
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc7
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
index be005c9f42ef..2586f32a3e21 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -20,6 +20,7 @@
/dts-v1/;
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
#include <dt-bindings/gpio/gpio.h>
#include "armada-370.dtsi"
@@ -135,6 +136,17 @@
pinctrl-names = "default";
phy0: ethernet-phy@0 {
reg = <0>;
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_WHITE>;
+ function = LED_FUNCTION_WAN;
+ default-state = "keep";
+ };
+ };
};
switch: switch@10 {
diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi
index bf64ba84b358..fde8a19aac0f 100644
--- a/arch/arm/boot/dts/imx6ull-colibri.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi
@@ -33,15 +33,9 @@
self-powered;
type = "micro";
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- usb_dr_connector: endpoint {
- remote-endpoint = <&usb1_drd_sw>;
- };
+ port {
+ usb_dr_connector: endpoint {
+ remote-endpoint = <&usb1_drd_sw>;
};
};
};
diff --git a/arch/arm/boot/dts/imx7d-remarkable2.dts b/arch/arm/boot/dts/imx7d-remarkable2.dts
index 8b2f11e85e05..427f8d04ec89 100644
--- a/arch/arm/boot/dts/imx7d-remarkable2.dts
+++ b/arch/arm/boot/dts/imx7d-remarkable2.dts
@@ -118,8 +118,6 @@
reg = <0x62>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_epdpmic>;
- #address-cells = <1>;
- #size-cells = <0>;
#thermal-sensor-cells = <0>;
epd-pwr-good-gpios = <&gpio6 21 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
index f908889c4f95..4d509876294b 100644
--- a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
+++ b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
@@ -38,8 +38,6 @@
switch0: switch@10 {
compatible = "qca,qca8337";
- #address-cells = <1>;
- #size-cells = <0>;
dsa,member = <0 0>;
@@ -67,26 +65,86 @@
port@1 {
reg = <1>;
label = "sw1";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@2 {
reg = <2>;
label = "sw2";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@3 {
reg = <3>;
label = "sw3";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@4 {
reg = <4>;
label = "sw4";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@5 {
reg = <5>;
label = "sw5";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
};
};
@@ -105,8 +163,6 @@
switch1: switch@14 {
compatible = "qca,qca8337";
- #address-cells = <1>;
- #size-cells = <0>;
dsa,member = <1 0>;
@@ -134,26 +190,86 @@
port@1 {
reg = <1>;
label = "sw6";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@2 {
reg = <2>;
label = "sw7";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@3 {
reg = <3>;
label = "sw8";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@4 {
reg = <4>;
label = "sw9";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
port@5 {
reg = <5>;
label = "sw10";
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_LAN;
+ default-state = "keep";
+ };
+ };
};
};
};
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 2ca76b69add7..511ca864c1b2 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -942,7 +942,7 @@
status = "disabled";
};
- spdif: sound@ff88b0000 {
+ spdif: sound@ff8b0000 {
compatible = "rockchip,rk3288-spdif", "rockchip,rk3066-spdif";
reg = <0x0 0xff8b0000 0x0 0x10000>;
#sound-dai-cells = <0>;
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 6dc6fed12af8..8d002c6e6cb3 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -76,7 +76,7 @@ CONFIG_RFKILL=y
CONFIG_RFKILL_INPUT=y
CONFIG_PCI=y
CONFIG_PCI_MSI=y
-CONFIG_PCI_IMX6=y
+CONFIG_PCI_IMX6_HOST=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_STANDALONE is not set
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 123a56f7f818..feb27a0ccfb4 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -1571,15 +1571,20 @@
dmc: bus@38000 {
compatible = "simple-bus";
- reg = <0x0 0x38000 0x0 0x400>;
#address-cells = <2>;
#size-cells = <2>;
- ranges = <0x0 0x0 0x0 0x38000 0x0 0x400>;
+ ranges = <0x0 0x0 0x0 0x38000 0x0 0x2000>;
canvas: video-lut@48 {
compatible = "amlogic,canvas";
reg = <0x0 0x48 0x0 0x14>;
};
+
+ pmu: pmu@80 {
+ reg = <0x0 0x80 0x0 0x40>,
+ <0x0 0xc00 0x0 0x40>;
+ interrupts = <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>;
+ };
};
usb2_phy1: phy@3a000 {
@@ -1705,12 +1710,6 @@
};
};
- pmu: pmu@ff638000 {
- reg = <0x0 0xff638000 0x0 0x100>,
- <0x0 0xff638c00 0x0 0x100>;
- interrupts = <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>;
- };
-
aobus: bus@ff800000 {
compatible = "simple-bus";
reg = <0x0 0xff800000 0x0 0x100000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
index d1a6390976a9..3f9dfd4d3884 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
@@ -194,7 +194,7 @@
rohm,reset-snvs-powered;
#clock-cells = <0>;
- clocks = <&osc_32k 0>;
+ clocks = <&osc_32k>;
clock-output-names = "clk-32k-out";
regulators {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index 88321b5b0693..6f0811587142 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -99,7 +99,7 @@
compatible = "regulator-fixed";
enable-active-high;
gpio = <&gpio2 20 GPIO_ACTIVE_HIGH>; /* PMIC_EN_ETH */
- off-on-delay = <500000>;
+ off-on-delay-us = <500000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_reg_eth>;
regulator-always-on;
@@ -139,7 +139,7 @@
enable-active-high;
/* Verdin SD_1_PWR_EN (SODIMM 76) */
gpio = <&gpio3 5 GPIO_ACTIVE_HIGH>;
- off-on-delay = <100000>;
+ off-on-delay-us = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc2_pwr_en>;
regulator-max-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin-dev.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin-dev.dtsi
index 361426c0da0a..c29622529200 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-verdin-dev.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin-dev.dtsi
@@ -10,7 +10,7 @@
compatible = "regulator-fixed";
enable-active-high;
gpio = <&gpio_expander_21 4 GPIO_ACTIVE_HIGH>; /* ETH_PWR_EN */
- off-on-delay = <500000>;
+ off-on-delay-us = <500000>;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <3300000>;
regulator-name = "+V3.3_ETH";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
index 0dd6180a8e39..1608775da0ad 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
@@ -87,7 +87,7 @@
compatible = "regulator-fixed";
enable-active-high;
gpio = <&gpio2 20 GPIO_ACTIVE_HIGH>; /* PMIC_EN_ETH */
- off-on-delay = <500000>;
+ off-on-delay-us = <500000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_reg_eth>;
regulator-always-on;
@@ -128,7 +128,7 @@
enable-active-high;
/* Verdin SD_1_PWR_EN (SODIMM 76) */
gpio = <&gpio4 22 GPIO_ACTIVE_HIGH>;
- off-on-delay = <100000>;
+ off-on-delay-us = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc2_pwr_en>;
regulator-max-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index 2dd60e3252f3..a237275ee017 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1128,7 +1128,7 @@
lcdif2: display-controller@32e90000 {
compatible = "fsl,imx8mp-lcdif";
- reg = <0x32e90000 0x238>;
+ reg = <0x32e90000 0x10000>;
interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX_ROOT>,
<&clk IMX8MP_CLK_MEDIA_APB_ROOT>,
diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
index ca3f96646b90..5cf07caf4103 100644
--- a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
+++ b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
@@ -62,11 +62,11 @@
perst-gpios = <&tlmm 58 GPIO_ACTIVE_LOW>;
};
-&pcie_phy0 {
+&pcie_qmp0 {
status = "okay";
};
-&pcie_phy1 {
+&pcie_qmp1 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk10.dtsi b/arch/arm64/boot/dts/qcom/ipq8074-hk10.dtsi
index 651a231554e0..1b8379ba87f9 100644
--- a/arch/arm64/boot/dts/qcom/ipq8074-hk10.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074-hk10.dtsi
@@ -48,11 +48,11 @@
perst-gpios = <&tlmm 61 GPIO_ACTIVE_LOW>;
};
-&pcie_phy0 {
+&pcie_qmp0 {
status = "okay";
};
-&pcie_phy1 {
+&pcie_qmp1 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
index aa0a7bd7307c..dd924331b0ee 100644
--- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
@@ -1012,7 +1012,7 @@
left_spkr: speaker@0,3 {
compatible = "sdw10217211000";
reg = <0 3>;
- powerdown-gpios = <&tlmm 130 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&tlmm 130 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrLeft";
#sound-dai-cells = <0>;
@@ -1021,7 +1021,7 @@
right_spkr: speaker@0,4 {
compatible = "sdw10217211000";
reg = <0 4>;
- powerdown-gpios = <&tlmm 130 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&tlmm 130 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrRight";
#sound-dai-cells = <0>;
diff --git a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
index b6137816f2f3..313083ec1f39 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi
@@ -464,7 +464,7 @@ ap_i2c_tpm: &i2c14 {
&mdss_dp_out {
data-lanes = <0 1>;
- link-frequencies = /bits/ 64 <1620000000 2700000000 5400000000 8100000000>;
+ link-frequencies = /bits/ 64 <1620000000 2700000000 5400000000>;
};
&mdss_mdp {
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
index df7d28f7ae60..be446eba4fa7 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-pmics.dtsi
@@ -59,8 +59,9 @@
#size-cells = <0>;
pmk8280_pon: pon@1300 {
- compatible = "qcom,pm8998-pon";
- reg = <0x1300>;
+ compatible = "qcom,pmk8350-pon";
+ reg = <0x1300>, <0x800>;
+ reg-names = "hlos", "pbs";
pmk8280_pon_pwrkey: pwrkey {
compatible = "qcom,pmk8350-pwrkey";
diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
index 67d2a663ce75..5c688cb6a7ce 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
@@ -753,7 +753,7 @@
left_spkr: speaker@0,3 {
compatible = "sdw10217211000";
reg = <0 3>;
- powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrLeft";
#sound-dai-cells = <0>;
@@ -761,7 +761,7 @@
right_spkr: speaker@0,4 {
compatible = "sdw10217211000";
- powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_LOW>;
reg = <0 4>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrRight";
diff --git a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
index 9850140514ba..41f59e32af64 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
@@ -662,7 +662,7 @@
left_spkr: speaker@0,3 {
compatible = "sdw10217211000";
reg = <0 3>;
- powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrLeft";
#sound-dai-cells = <0>;
@@ -670,7 +670,7 @@
right_spkr: speaker@0,4 {
compatible = "sdw10217211000";
- powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_LOW>;
reg = <0 4>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrRight";
diff --git a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
index e54cdc8bc31f..4c9de236676d 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
@@ -764,7 +764,7 @@
left_spkr: speaker@0,3 {
compatible = "sdw10217211000";
reg = <0 3>;
- powerdown-gpios = <&tlmm 26 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&tlmm 26 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrLeft";
#sound-dai-cells = <0>;
@@ -773,7 +773,7 @@
right_spkr: speaker@0,4 {
compatible = "sdw10217211000";
reg = <0 4>;
- powerdown-gpios = <&tlmm 127 GPIO_ACTIVE_HIGH>;
+ powerdown-gpios = <&tlmm 127 GPIO_ACTIVE_LOW>;
#thermal-sensor-cells = <0>;
sound-name-prefix = "SpkrRight";
#sound-dai-cells = <0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3326-anbernic-rg351m.dts b/arch/arm64/boot/dts/rockchip/rk3326-anbernic-rg351m.dts
index 61b31688b469..ce318e05f0a6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-anbernic-rg351m.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3326-anbernic-rg351m.dts
@@ -24,6 +24,8 @@
&internal_display {
compatible = "elida,kd35t133";
+ iovcc-supply = <&vcc_lcd>;
+ vdd-supply = <&vcc_lcd>;
};
&pwm0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go.dtsi b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go.dtsi
index 04eba432fb0e..80fc53c807a4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go.dtsi
@@ -235,10 +235,8 @@
internal_display: panel@0 {
reg = <0>;
backlight = <&backlight>;
- iovcc-supply = <&vcc_lcd>;
reset-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>;
rotation = <270>;
- vdd-supply = <&vcc_lcd>;
port {
mipi_in_panel: endpoint {
diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2-v11.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2-v11.dts
index 139c898e590e..d94ac81eb4e6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2-v11.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2-v11.dts
@@ -83,6 +83,8 @@
&internal_display {
compatible = "elida,kd35t133";
+ iovcc-supply = <&vcc_lcd>;
+ vdd-supply = <&vcc_lcd>;
};
&rk817 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
index 4702183b673c..aa6f5b12206b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
@@ -59,6 +59,8 @@
&internal_display {
compatible = "elida,kd35t133";
+ iovcc-supply = <&vcc_lcd>;
+ vdd-supply = <&vcc_lcd>;
};
&rk817_charger {
diff --git a/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi
index 083452c67711..e47d1398aeca 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi
@@ -61,7 +61,6 @@
pinctrl-names = "default";
pinctrl-0 = <&bl_en>;
pwms = <&pwm0 0 1000000 PWM_POLARITY_INVERTED>;
- pwm-delay-us = <10000>;
};
emmc_pwrseq: emmc-pwrseq {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
index ee6095baba4d..5c1929d41cc0 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
@@ -198,7 +198,6 @@
power-supply = <&pp3300_disp>;
pinctrl-names = "default";
pinctrl-0 = <&bl_en>;
- pwm-delay-us = <10000>;
};
gpio_keys: gpio-keys {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index a47d9f758611..c5e7de60c121 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -167,7 +167,6 @@
pinctrl-names = "default";
pinctrl-0 = <&bl_en>;
pwms = <&pwm1 0 1000000 0>;
- pwm-delay-us = <10000>;
};
dmic: dmic {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
index 194e48c755f6..ddd45de97950 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
@@ -50,19 +50,9 @@
pinctrl-0 = <&panel_en_pin>;
power-supply = <&vcc3v3_panel>;
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- panel_in_edp: endpoint@0 {
- reg = <0>;
- remote-endpoint = <&edp_out_panel>;
- };
+ port {
+ panel_in_edp: endpoint {
+ remote-endpoint = <&edp_out_panel>;
};
};
};
@@ -943,7 +933,7 @@
disable-wp;
pinctrl-names = "default";
pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
- sd-uhs-sdr104;
+ sd-uhs-sdr50;
vmmc-supply = <&vcc3v0_sd>;
vqmmc-supply = <&vcc_sdio>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi
index 78157521e944..bca2b50e0a93 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi
@@ -647,16 +647,10 @@
avdd-supply = <&avdd>;
backlight = <&backlight>;
dvdd-supply = <&vcc3v3_s0>;
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@0 {
- reg = <0>;
-
- mipi_in_panel: endpoint {
- remote-endpoint = <&mipi_out_panel>;
- };
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
};
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 1881b4b71f91..40e7c4a70055 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -552,7 +552,7 @@
<0x0 0xfff10000 0 0x10000>, /* GICH */
<0x0 0xfff20000 0 0x10000>; /* GICV */
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH 0>;
- its: interrupt-controller@fee20000 {
+ its: msi-controller@fee20000 {
compatible = "arm,gic-v3-its";
msi-controller;
#msi-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
index 65a80d1f6d91..9a0e217f069f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353x.dtsi
@@ -16,8 +16,10 @@
};
&cru {
- assigned-clocks = <&cru PLL_GPLL>, <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
- assigned-clock-rates = <1200000000>, <200000000>, <241500000>;
+ assigned-clocks = <&pmucru CLK_RTC_32K>, <&cru PLL_GPLL>,
+ <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
+ assigned-clock-rates = <32768>, <1200000000>,
+ <200000000>, <241500000>;
};
&gpio_keys_control {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
index b4b2df821cba..c763c7f3b1b3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg503.dts
@@ -105,8 +105,10 @@
};
&cru {
- assigned-clocks = <&cru PLL_GPLL>, <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
- assigned-clock-rates = <1200000000>, <200000000>, <500000000>;
+ assigned-clocks = <&pmucru CLK_RTC_32K>, <&cru PLL_GPLL>,
+ <&pmucru PLL_PPLL>, <&cru PLL_VPLL>;
+ assigned-clock-rates = <32768>, <1200000000>,
+ <200000000>, <500000000>;
};
&dsi_dphy0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
index ce7165d7f1a1..102e448bc026 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
@@ -598,7 +598,7 @@
non-removable;
pinctrl-names = "default";
pinctrl-0 = <&sdmmc1_bus4 &sdmmc1_cmd &sdmmc1_clk>;
- sd-uhs-sdr104;
+ sd-uhs-sdr50;
vmmc-supply = <&vcc3v3_sys>;
vqmmc-supply = <&vcc_1v8>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 005cde61b4b2..a506948b5572 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -222,6 +222,7 @@
cache-size = <131072>;
cache-line-size = <64>;
cache-sets = <512>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -230,6 +231,7 @@
cache-size = <131072>;
cache-line-size = <64>;
cache-sets = <512>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -238,6 +240,7 @@
cache-size = <131072>;
cache-line-size = <64>;
cache-sets = <512>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -246,6 +249,7 @@
cache-size = <131072>;
cache-line-size = <64>;
cache-sets = <512>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -254,6 +258,7 @@
cache-size = <524288>;
cache-line-size = <64>;
cache-sets = <1024>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -262,6 +267,7 @@
cache-size = <524288>;
cache-line-size = <64>;
cache-sets = <1024>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -270,6 +276,7 @@
cache-size = <524288>;
cache-line-size = <64>;
cache-sets = <1024>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -278,6 +285,7 @@
cache-size = <524288>;
cache-line-size = <64>;
cache-sets = <1024>;
+ cache-level = <2>;
next-level-cache = <&l3_cache>;
};
@@ -286,6 +294,7 @@
cache-size = <3145728>;
cache-line-size = <64>;
cache-sets = <4096>;
+ cache-level = <3>;
};
};
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
index 5edec2f49ec9..deff21bfa680 100644
--- a/arch/arm64/kernel/compat_alignment.c
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -314,36 +314,32 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
unsigned int type;
u32 instr = 0;
- u16 tinstr = 0;
int isize = 4;
int thumb2_32b = 0;
- int fault;
instrptr = instruction_pointer(regs);
if (compat_thumb_mode(regs)) {
__le16 __user *ptr = (__le16 __user *)(instrptr & ~1);
+ u16 tinstr, tinst2;
- fault = alignment_get_thumb(regs, ptr, &tinstr);
- if (!fault) {
- if (IS_T32(tinstr)) {
- /* Thumb-2 32-bit */
- u16 tinst2;
- fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
- instr = ((u32)tinstr << 16) | tinst2;
- thumb2_32b = 1;
- } else {
- isize = 2;
- instr = thumb2arm(tinstr);
- }
+ if (alignment_get_thumb(regs, ptr, &tinstr))
+ return 1;
+
+ if (IS_T32(tinstr)) { /* Thumb-2 32-bit */
+ if (alignment_get_thumb(regs, ptr + 1, &tinst2))
+ return 1;
+ instr = ((u32)tinstr << 16) | tinst2;
+ thumb2_32b = 1;
+ } else {
+ isize = 2;
+ instr = thumb2arm(tinstr);
}
} else {
- fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr);
+ if (alignment_get_arm(regs, (__le32 __user *)instrptr, &instr))
+ return 1;
}
- if (fault)
- return 1;
-
switch (CODING_BITS(instr)) {
case 0x00000000: /* 3.13.4 load/store instruction extensions */
if (LDSTHD_I_BIT(instr))
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3bd732eaf087..4b2e16e696a8 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ case KVM_CAP_IRQFD_RESAMPLE:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -1889,9 +1890,33 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
return ret;
}
+static u64 get_hyp_id_aa64pfr0_el1(void)
+{
+ /*
+ * Track whether the system isn't affected by spectre/meltdown in the
+ * hypervisor's view of id_aa64pfr0_el1, used for protected VMs.
+ * Although this is per-CPU, we make it global for simplicity, e.g., not
+ * to have to worry about vcpu migration.
+ *
+ * Unlike for non-protected VMs, userspace cannot override this for
+ * protected VMs.
+ */
+ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
+
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
+ arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
+ arm64_get_meltdown_state() == SPECTRE_UNAFFECTED);
+
+ return val;
+}
+
static void kvm_hyp_init_symbols(void)
{
- kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1();
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index 07edfc7524c9..37440e1dda93 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -33,11 +33,14 @@
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - Data Independent Timing
+ * - Spectre/Meltdown Mitigation
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
)
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 08d2b004f4b7..edd969a1f36b 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -85,19 +85,12 @@ static u64 get_restricted_features_unsigned(u64 sys_reg_val,
static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
{
- const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
u64 set_mask = 0;
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
- /* Spectre and Meltdown mitigation in KVM */
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
- (u64)kvm->arch.pfr0_csv2);
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
- (u64)kvm->arch.pfr0_csv3);
-
return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index c243b10f3e15..5eca0cdd961d 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -558,6 +558,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
}
+ kvm_vcpu_pmu_restore_guest(vcpu);
}
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1b2c161120be..34688918c811 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -794,7 +794,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
kvm_pmu_handle_pmcr(vcpu, val);
- kvm_vcpu_pmu_restore_guest(vcpu);
} else {
/* PMCR.P & PMCR.C are RAZ */
val = __vcpu_sys_reg(vcpu, PMCR_EL0)
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index a6acb94ea3d6..c2edadb8ec6a 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -281,4 +281,8 @@
/* DMB */
#define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH)
+/* ADR */
+#define A64_ADR(Rd, offset) \
+ aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR)
+
#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 62f805f427b7..b26da8efa616 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1900,7 +1900,8 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
restore_args(ctx, args_off, nargs);
/* call original func */
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
- emit(A64_BLR(A64_R(10)), ctx);
+ emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
+ emit(A64_RET(A64_R(10)), ctx);
/* store return value */
emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
/* reserve a nop for bpf_tramp_image_put */
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 7fd51257e0ed..3ddde336e6a5 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -447,6 +447,22 @@ config ARCH_IOREMAP
protection support. However, you can enable LoongArch DMW-based
ioremap() for better performance.
+config ARCH_WRITECOMBINE
+ bool "Enable WriteCombine (WUC) for ioremap()"
+ help
+ LoongArch maintains cache coherency in hardware, but when paired
+ with LS7A chipsets the WUC attribute (Weak-ordered UnCached, which
+ is similar to WriteCombine) is out of the scope of cache coherency
+ machanism for PCIe devices (this is a PCIe protocol violation, which
+ may be fixed in newer chipsets).
+
+ This means WUC can only used for write-only memory regions now, so
+ this option is disabled by default, making WUC silently fallback to
+ SUC for ioremap(). You can enable this option if the kernel is ensured
+ to run on hardware without this bug.
+
+ You can override this setting via writecombine=on/off boot parameter.
+
config ARCH_STRICT_ALIGN
bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT
default y
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 4198753aa1d0..976a810352c6 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -41,8 +41,11 @@ extern void loongarch_suspend_enter(void);
static inline unsigned long acpi_get_wakeup_address(void)
{
+#ifdef CONFIG_SUSPEND
extern void loongarch_wakeup_start(void);
return (unsigned long)loongarch_wakeup_start;
+#endif
+ return 0UL;
}
#endif /* _ASM_LOONGARCH_ACPI_H */
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index 8fb699b4d40a..5c9c03bdf915 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -71,9 +71,9 @@ extern unsigned long vm_map_base;
#define _ATYPE32_ int
#define _ATYPE64_ __s64
#ifdef CONFIG_64BIT
-#define _CONST64_(x) x ## L
+#define _CONST64_(x) x ## UL
#else
-#define _CONST64_(x) x ## LL
+#define _CONST64_(x) x ## ULL
#endif
#endif
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index 0051b526ac6d..c60796869b2b 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -13,7 +13,6 @@ const char *get_system_type(void);
extern void init_environ(void);
extern void memblock_init(void);
extern void platform_init(void);
-extern void plat_swiotlb_setup(void);
extern int __init init_numa_memory(void);
struct loongson_board_info {
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index b07974218393..f6177f133477 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -42,6 +42,7 @@
#define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU)
#define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX)
#define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX)
+#define cpu_has_crc32 cpu_opt(LOONGARCH_CPU_CRC32)
#define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX)
#define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO)
#define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ)
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index c3da91759472..88773d849e33 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -78,25 +78,26 @@ enum cpu_type_enum {
#define CPU_FEATURE_FPU 3 /* CPU has FPU */
#define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */
#define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */
-#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */
-#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */
-#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */
-#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */
-#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */
-#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */
-#define CPU_FEATURE_TLB 12 /* CPU has TLB */
-#define CPU_FEATURE_CSR 13 /* CPU has CSR */
-#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */
-#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */
-#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */
-#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */
-#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */
-#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */
-#define CPU_FEATURE_SCALEFREQ 20 /* CPU supports cpufreq scaling */
-#define CPU_FEATURE_FLATMODE 21 /* CPU has flat mode */
-#define CPU_FEATURE_EIODECODE 22 /* CPU has EXTIOI interrupt pin decode mode */
-#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */
-#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (running in VM) */
+#define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */
+#define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */
+#define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */
+#define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */
+#define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */
+#define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */
+#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */
+#define CPU_FEATURE_TLB 13 /* CPU has TLB */
+#define CPU_FEATURE_CSR 14 /* CPU has CSR */
+#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */
+#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */
+#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */
+#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */
+#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */
+#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */
+#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */
+#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */
+#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */
+#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
+#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -104,6 +105,7 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU)
#define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX)
#define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX)
+#define LOONGARCH_CPU_CRC32 BIT_ULL(CPU_FEATURE_CRC32)
#define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX)
#define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO)
#define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ)
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 402a7d9e3a53..545e2708fbf7 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -54,8 +54,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
* @offset: bus address of the memory
* @size: size of the resource to map
*/
+extern pgprot_t pgprot_wc;
+
#define ioremap_wc(offset, size) \
- ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC))
+ ioremap_prot((offset), (size), pgprot_val(pgprot_wc))
#define ioremap_cache(offset, size) \
ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 65b7dcdea16d..83da5d29e2d1 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -117,7 +117,7 @@ static inline u32 read_cpucfg(u32 reg)
#define CPUCFG1_EP BIT(22)
#define CPUCFG1_RPLV BIT(23)
#define CPUCFG1_HUGEPG BIT(24)
-#define CPUCFG1_IOCSRBRD BIT(25)
+#define CPUCFG1_CRC32 BIT(25)
#define CPUCFG1_MSGINT BIT(26)
#define LOONGARCH_CPUCFG2 0x2
@@ -423,9 +423,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ASID_ASID_WIDTH 10
#define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT)
-#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */
+#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[VALEN-1] = 0 */
-#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */
+#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[VALEN-1] = 1 */
#define LOONGARCH_CSR_PGD 0x1b /* Page table base */
diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h
index 438f09d4ccf4..88554f92e010 100644
--- a/arch/loongarch/include/asm/module.lds.h
+++ b/arch/loongarch/include/asm/module.lds.h
@@ -2,8 +2,8 @@
/* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */
SECTIONS {
. = ALIGN(4);
- .got : { BYTE(0) }
- .plt : { BYTE(0) }
- .plt.idx : { BYTE(0) }
- .ftrace_trampoline : { BYTE(0) }
+ .got 0 : { BYTE(0) }
+ .plt 0 : { BYTE(0) }
+ .plt.idx 0 : { BYTE(0) }
+ .ftrace_trampoline 0 : { BYTE(0) }
}
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index cc48ed262021..82d811b5c6e9 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -47,11 +47,12 @@ struct user_fp_state {
};
struct user_watch_state {
- uint16_t dbg_info;
+ uint64_t dbg_info;
struct {
uint64_t addr;
uint64_t mask;
uint32_t ctrl;
+ uint32_t pad;
} dbg_regs[8];
};
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 3a3fce2d7846..5adf0f736c6d 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -60,7 +60,7 @@ static inline void set_elf_platform(int cpu, const char *plat)
/* MAP BASE */
unsigned long vm_map_base;
-EXPORT_SYMBOL_GPL(vm_map_base);
+EXPORT_SYMBOL(vm_map_base);
static void cpu_probe_addrbits(struct cpuinfo_loongarch *c)
{
@@ -94,13 +94,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR |
LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH;
- elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32;
+ elf_hwcap = HWCAP_LOONGARCH_CPUCFG;
config = read_cpucfg(LOONGARCH_CPUCFG1);
if (config & CPUCFG1_UAL) {
c->options |= LOONGARCH_CPU_UAL;
elf_hwcap |= HWCAP_LOONGARCH_UAL;
}
+ if (config & CPUCFG1_CRC32) {
+ c->options |= LOONGARCH_CPU_CRC32;
+ elf_hwcap |= HWCAP_LOONGARCH_CRC32;
+ }
+
config = read_cpucfg(LOONGARCH_CPUCFG2);
if (config & CPUCFG2_LAM) {
diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
index 5c67cc4fd56d..0d82907b5404 100644
--- a/arch/loongarch/kernel/proc.c
+++ b/arch/loongarch/kernel/proc.c
@@ -76,6 +76,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_fpu) seq_printf(m, " fpu");
if (cpu_has_lsx) seq_printf(m, " lsx");
if (cpu_has_lasx) seq_printf(m, " lasx");
+ if (cpu_has_crc32) seq_printf(m, " crc32");
if (cpu_has_complex) seq_printf(m, " complex");
if (cpu_has_crypto) seq_printf(m, " crypto");
if (cpu_has_lvz) seq_printf(m, " lvz");
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index 06bceae7d104..5fcffb452367 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -391,10 +391,10 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
return 0;
}
-static int ptrace_hbp_get_resource_info(unsigned int note_type, u16 *info)
+static int ptrace_hbp_get_resource_info(unsigned int note_type, u64 *info)
{
u8 num;
- u16 reg = 0;
+ u64 reg = 0;
switch (note_type) {
case NT_LOONGARCH_HW_BREAK:
@@ -524,15 +524,16 @@ static int ptrace_hbp_set_addr(unsigned int note_type,
return modify_user_hw_breakpoint(bp, &attr);
}
-#define PTRACE_HBP_CTRL_SZ sizeof(u32)
#define PTRACE_HBP_ADDR_SZ sizeof(u64)
#define PTRACE_HBP_MASK_SZ sizeof(u64)
+#define PTRACE_HBP_CTRL_SZ sizeof(u32)
+#define PTRACE_HBP_PAD_SZ sizeof(u32)
static int hw_break_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- u16 info;
+ u64 info;
u32 ctrl;
u64 addr, mask;
int ret, idx = 0;
@@ -545,7 +546,7 @@ static int hw_break_get(struct task_struct *target,
membuf_write(&to, &info, sizeof(info));
- /* (address, ctrl) registers */
+ /* (address, mask, ctrl) registers */
while (to.left) {
ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
if (ret)
@@ -562,6 +563,7 @@ static int hw_break_get(struct task_struct *target,
membuf_store(&to, addr);
membuf_store(&to, mask);
membuf_store(&to, ctrl);
+ membuf_zero(&to, sizeof(u32));
idx++;
}
@@ -582,7 +584,7 @@ static int hw_break_set(struct task_struct *target,
offset = offsetof(struct user_watch_state, dbg_regs);
user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
- /* (address, ctrl) registers */
+ /* (address, mask, ctrl) registers */
limit = regset->n * regset->size;
while (count && offset < limit) {
if (count < PTRACE_HBP_ADDR_SZ)
@@ -602,7 +604,7 @@ static int hw_break_set(struct task_struct *target,
break;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
- offset, offset + PTRACE_HBP_ADDR_SZ);
+ offset, offset + PTRACE_HBP_MASK_SZ);
if (ret)
return ret;
@@ -611,8 +613,8 @@ static int hw_break_set(struct task_struct *target,
return ret;
offset += PTRACE_HBP_MASK_SZ;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
- offset, offset + PTRACE_HBP_MASK_SZ);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl,
+ offset, offset + PTRACE_HBP_CTRL_SZ);
if (ret)
return ret;
@@ -620,6 +622,11 @@ static int hw_break_set(struct task_struct *target,
if (ret)
return ret;
offset += PTRACE_HBP_CTRL_SZ;
+
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ offset, offset + PTRACE_HBP_PAD_SZ);
+ offset += PTRACE_HBP_PAD_SZ;
+
idx++;
}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index bae84ccf6d36..4444b13418f0 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -160,6 +160,27 @@ static void __init smbios_parse(void)
dmi_walk(find_tokens, NULL);
}
+#ifdef CONFIG_ARCH_WRITECOMBINE
+pgprot_t pgprot_wc = PAGE_KERNEL_WUC;
+#else
+pgprot_t pgprot_wc = PAGE_KERNEL_SUC;
+#endif
+
+EXPORT_SYMBOL(pgprot_wc);
+
+static int __init setup_writecombine(char *p)
+{
+ if (!strcmp(p, "on"))
+ pgprot_wc = PAGE_KERNEL_WUC;
+ else if (!strcmp(p, "off"))
+ pgprot_wc = PAGE_KERNEL_SUC;
+ else
+ pr_warn("Unknown writecombine setting \"%s\".\n", p);
+
+ return 0;
+}
+early_param("writecombine", setup_writecombine);
+
static int usermem __initdata;
static int __init early_parse_mem(char *p)
@@ -368,8 +389,8 @@ static void __init arch_mem_init(char **cmdline_p)
/*
* In order to reduce the possibility of kernel panic when failed to
* get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
- * low memory as small as possible before plat_swiotlb_setup(), so
- * make sparse_init() using top-down allocation.
+ * low memory as small as possible before swiotlb_init(), so make
+ * sparse_init() using top-down allocation.
*/
memblock_set_bottom_up(false);
sparse_init();
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 3a690f96f00c..2463d2fea21f 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -30,7 +30,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
regs->regs[1] = 0;
for (unwind_start(&state, task, regs);
- !unwind_done(&state); unwind_next_frame(&state)) {
+ !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || !consume_entry(cookie, addr))
break;
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
index a463d6961344..ba324ba76fa1 100644
--- a/arch/loongarch/kernel/unwind.c
+++ b/arch/loongarch/kernel/unwind.c
@@ -28,5 +28,6 @@ bool default_next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
+ state->error = true;
return false;
}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 9095fde8e55d..55afc27320e1 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -211,7 +211,7 @@ static bool next_frame(struct unwind_state *state)
pc = regs->csr_era;
if (user_mode(regs) || !__kernel_text_address(pc))
- return false;
+ goto out;
state->first = true;
state->pc = pc;
@@ -226,6 +226,8 @@ static bool next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
+out:
+ state->error = true;
return false;
}
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index e018aed34586..3b7d8129570b 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -41,7 +41,7 @@
* don't have to care about aliases on other CPUs.
*/
unsigned long empty_zero_page, zero_page_mask;
-EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
void setup_zero_pages(void)
@@ -270,7 +270,7 @@ pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
#endif
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
-EXPORT_SYMBOL_GPL(invalid_pmd_table);
+EXPORT_SYMBOL(invalid_pmd_table);
#endif
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
EXPORT_SYMBOL(invalid_pte_table);
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index e70c846efaa1..db9342b2d0e6 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1022,6 +1022,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
emit_atomic(insn, ctx);
break;
+ /* Speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
default:
pr_err("bpf_jit: unknown opcode %02x\n", code);
return -EINVAL;
diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S
index 90da899c06a1..e2fc3b4e31f0 100644
--- a/arch/loongarch/power/suspend_asm.S
+++ b/arch/loongarch/power/suspend_asm.S
@@ -80,6 +80,10 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL)
JUMP_VIRT_ADDR t0, t1
+ /* Enable PG */
+ li.w t0, 0xb0 # PLV=0, IE=0, PG=1
+ csrwr t0, LOONGARCH_CSR_CRMD
+
la.pcrel t0, acpi_saved_sp
ld.d sp, t0, 0
SETUP_WAKEUP
diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
index 33788668cbdb..3779e7855bd7 100644
--- a/arch/mips/bmips/dma.c
+++ b/arch/mips/bmips/dma.c
@@ -5,6 +5,8 @@
#include <asm/bmips.h>
#include <asm/io.h>
+bool bmips_rac_flush_disable;
+
void arch_sync_dma_for_cpu_all(void)
{
void __iomem *cbr = BMIPS_GET_CBR();
@@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void)
boot_cpu_type() != CPU_BMIPS4380)
return;
+ if (unlikely(bmips_rac_flush_disable))
+ return;
+
/* Flush stale data out of the readahead cache */
cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
__raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index e95b3f78e7cd..549a6392a3d2 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -35,6 +35,8 @@
#define REG_BCM6328_OTP ((void __iomem *)CKSEG1ADDR(0x1000062c))
#define BCM6328_TP1_DISABLED BIT(9)
+extern bool bmips_rac_flush_disable;
+
static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000;
struct bmips_quirk {
@@ -104,6 +106,12 @@ static void bcm6358_quirks(void)
* disable SMP for now
*/
bmips_smp_enabled = 0;
+
+ /*
+ * RAC flush causes kernel panics on BCM6358 when booting from TP1
+ * because the bootloader is not initializing it properly.
+ */
+ bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
}
static void bcm6368_quirks(void)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 2bbc0fcce04a..5e26c7f2c25a 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
*/
}
+static inline bool __pte_protnone(unsigned long pte)
+{
+ return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE);
+}
+
static inline bool __pte_flags_need_flush(unsigned long oldval,
unsigned long newval)
{
@@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval,
/*
* We do not expect kernel mappings or non-PTEs or not-present PTEs.
*/
- VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
- VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED);
VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 2087a785f05f..5fff0d04b23f 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
static int ppr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
+ if (!target->thread.regs)
+ return -EINVAL;
+
return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
}
@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count, const void *kbuf,
const void __user *ubuf)
{
+ if (!target->thread.regs)
+ return -EINVAL;
+
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.regs->ppr, 0, sizeof(u64));
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c5405fc5538..d23e25e8432d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -576,6 +576,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ case KVM_CAP_IRQFD_RESAMPLE:
+ r = !xive_enabled();
+ break;
+#endif
+
case KVM_CAP_PPC_ALLOC_HTAB:
r = hv_enabled;
break;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b44ce71917d7..16cfe56be05b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -366,6 +366,7 @@ void update_numa_distance(struct device_node *node)
WARN(numa_distance_table[nid][nid] == -1,
"NUMA distance details for node %d not provided\n", nid);
}
+EXPORT_SYMBOL_GPL(update_numa_distance);
/*
* ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 2f8385523a13..1a53e048ceb7 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -1428,6 +1428,13 @@ static int papr_scm_probe(struct platform_device *pdev)
return -ENODEV;
}
+ /*
+ * open firmware platform device create won't update the NUMA
+ * distance table. For PAPR SCM devices we use numa_map_to_online_node()
+ * to find the nearest online NUMA node and that requires correct
+ * distance table information.
+ */
+ update_numa_distance(dn);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 559112312810..513180467562 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -856,6 +856,13 @@ int pseries_vas_dlpar_cpu(void)
{
int new_nr_creds, rc;
+ /*
+ * NX-GZIP is not enabled. Nothing to do for DLPAR event
+ */
+ if (!copypaste_feat)
+ return 0;
+
+
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
@@ -1012,6 +1019,7 @@ static int __init pseries_vas_init(void)
* Linux supports user space COPY/PASTE only with Radix
*/
if (!radix_enabled()) {
+ copypaste_feat = false;
pr_err("API is supported only with radix page tables\n");
return -ENOTSUPP;
}
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 5b182d1c196c..eb7f29a412f8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -126,6 +126,7 @@ config RISCV
select OF_IRQ
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
+ select RISCV_ALTERNATIVE if !XIP_KERNEL
select RISCV_INTC
select RISCV_TIMER if RISCV_SBI
select SIFIVE_PLIC
@@ -401,9 +402,8 @@ config RISCV_ISA_C
config RISCV_ISA_SVPBMT
bool "SVPBMT extension support"
depends on 64BIT && MMU
- depends on !XIP_KERNEL
+ depends on RISCV_ALTERNATIVE
default y
- select RISCV_ALTERNATIVE
help
Adds support to dynamically detect the presence of the SVPBMT
ISA-extension (Supervisor-mode: page-based memory types) and
@@ -428,8 +428,8 @@ config TOOLCHAIN_HAS_ZBB
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
depends on TOOLCHAIN_HAS_ZBB
- depends on !XIP_KERNEL && MMU
- select RISCV_ALTERNATIVE
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
default y
help
Adds support to dynamically detect the presence of the ZBB
@@ -443,9 +443,9 @@ config RISCV_ISA_ZBB
config RISCV_ISA_ZICBOM
bool "Zicbom extension support for non-coherent DMA operation"
- depends on !XIP_KERNEL && MMU
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
default y
- select RISCV_ALTERNATIVE
select RISCV_DMA_NONCOHERENT
help
Adds support to dynamically detect the presence of the ZICBOM
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index 69621ae6d647..0c8f4652cd82 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -2,8 +2,7 @@ menu "CPU errata selection"
config ERRATA_SIFIVE
bool "SiFive errata"
- depends on !XIP_KERNEL
- select RISCV_ALTERNATIVE
+ depends on RISCV_ALTERNATIVE
help
All SiFive errata Kconfig depend on this Kconfig. Disabling
this Kconfig will disable all SiFive errata. Please say "Y"
@@ -35,8 +34,7 @@ config ERRATA_SIFIVE_CIP_1200
config ERRATA_THEAD
bool "T-HEAD errata"
- depends on !XIP_KERNEL
- select RISCV_ALTERNATIVE
+ depends on RISCV_ALTERNATIVE
help
All T-HEAD errata Kconfig depend on this Kconfig. Disabling
this Kconfig will disable all T-HEAD errata. Please say "Y"
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index 07e2e2649604..f87c5164d9cf 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -259,7 +259,6 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ssi_clk", "pclk";
resets = <&sysrst K210_RST_SPI2>;
- spi-max-frequency = <25000000>;
};
i2s0: i2s@50250000 {
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 5c3e7b97fcc6..0a55099bb734 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -22,6 +22,14 @@
*/
enum fixed_addresses {
FIX_HOLE,
+ /*
+ * The fdt fixmap mapping must be PMD aligned and will be mapped
+ * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit.
+ */
+ FIX_FDT_END,
+ FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
+
+ /* Below fixmaps will be mapped using fixmap_pte */
FIX_PTE,
FIX_PMD,
FIX_PUD,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e3021b2590de..6263a0de1c6a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -57,18 +57,31 @@ struct riscv_isa_ext_data {
unsigned int isa_ext_id;
};
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
static __always_inline bool
riscv_has_extension_likely(const unsigned long ext)
{
compiletime_assert(ext < RISCV_ISA_EXT_MAX,
"ext must be < RISCV_ISA_EXT_MAX");
- asm_volatile_goto(
- ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
- :
- : [ext] "i" (ext)
- :
- : l_no);
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ asm_volatile_goto(
+ ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
+ :
+ : [ext] "i" (ext)
+ :
+ : l_no);
+ } else {
+ if (!__riscv_isa_extension_available(NULL, ext))
+ goto l_no;
+ }
return true;
l_no:
@@ -81,26 +94,23 @@ riscv_has_extension_unlikely(const unsigned long ext)
compiletime_assert(ext < RISCV_ISA_EXT_MAX,
"ext must be < RISCV_ISA_EXT_MAX");
- asm_volatile_goto(
- ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
- :
- : [ext] "i" (ext)
- :
- : l_yes);
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ asm_volatile_goto(
+ ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
+ :
+ : [ext] "i" (ext)
+ :
+ : l_yes);
+ } else {
+ if (__riscv_isa_extension_available(NULL, ext))
+ goto l_yes;
+ }
return false;
l_yes:
return true;
}
-unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext) \
- __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
#endif
#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ab05f892d317..f641837ccf31 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -87,9 +87,13 @@
#define FIXADDR_TOP PCI_IO_START
#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
+#define MAX_FDT_SIZE PMD_SIZE
+#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
+#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE)
#else
-#define FIXADDR_SIZE PGDIR_SIZE
+#define MAX_FDT_SIZE PGDIR_SIZE
+#define FIX_FDT_SIZE MAX_FDT_SIZE
+#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE)
#endif
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 376d2827e736..a059b73f4ddb 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -278,12 +278,8 @@ void __init setup_arch(char **cmdline_p)
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
- if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
- unflatten_device_tree();
- else
- pr_err("No DTB found in kernel mappings\n");
+ unflatten_device_tree();
#endif
- early_init_fdt_scan_reserved_mem();
misc_mem_init();
init_resources();
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index bfb2afa4135f..dee66c9290cc 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -19,6 +19,7 @@
#include <asm/signal32.h>
#include <asm/switch_to.h>
#include <asm/csr.h>
+#include <asm/cacheflush.h>
extern u32 __user_rt_sigreturn[2];
@@ -181,6 +182,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
{
struct rt_sigframe __user *frame;
long err = 0;
+ unsigned long __maybe_unused addr;
frame = get_sigframe(ksig, regs, sizeof(*frame));
if (!access_ok(frame, sizeof(*frame)))
@@ -209,7 +211,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn,
sizeof(frame->sigreturn_code)))
return -EFAULT;
- regs->ra = (unsigned long)&frame->sigreturn_code;
+
+ addr = (unsigned long)&frame->sigreturn_code;
+ /* Make sure the two instructions are pushed to icache. */
+ flush_icache_range(addr, addr + sizeof(frame->sigreturn_code));
+
+ regs->ra = addr;
#endif /* CONFIG_MMU */
/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 478d6763a01a..0f14f4a8d179 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -57,7 +57,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
-#define DTB_EARLY_BASE_VA PGDIR_SIZE
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
@@ -236,31 +235,22 @@ static void __init setup_bootmem(void)
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
reserve_initrd_mem();
+
+ /*
+ * No allocation should be done before reserving the memory as defined
+ * in the device tree, otherwise the allocation could end up in a
+ * reserved region.
+ */
+ early_init_fdt_scan_reserved_mem();
+
/*
* If DTB is built in, no need to reserve its memblock.
* Otherwise, do reserve it but avoid using
* early_init_fdt_reserve_self() since __pa() does
* not work for DTB pointers that are fixmap addresses
*/
- if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
- /*
- * In case the DTB is not located in a memory region we won't
- * be able to locate it later on via the linear mapping and
- * get a segfault when accessing it via __va(dtb_early_pa).
- * To avoid this situation copy DTB to a memory region.
- * Note that memblock_phys_alloc will also reserve DTB region.
- */
- if (!memblock_is_memory(dtb_early_pa)) {
- size_t fdt_size = fdt_totalsize(dtb_early_va);
- phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
- void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);
-
- memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
- early_memunmap(new_dtb_early_va, fdt_size);
- _dtb_early_pa = new_dtb_early_pa;
- } else
- memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
- }
+ if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
dma_contiguous_reserve(dma32_phys_limit);
if (IS_ENABLED(CONFIG_64BIT))
@@ -279,9 +269,6 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
-static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
-static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
@@ -626,9 +613,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
#define trampoline_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
-#define early_dtb_pgd_next (pgtable_l5_enabled ? \
- (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
- (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
@@ -636,7 +620,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
-#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
@@ -860,32 +843,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
* this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
* entry.
*/
-static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
+static void __init create_fdt_early_page_table(pgd_t *pgdir,
+ uintptr_t fix_fdt_va,
+ uintptr_t dtb_pa)
{
-#ifndef CONFIG_BUILTIN_DTB
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
- PGDIR_SIZE,
- IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
-
- if (pgtable_l5_enabled)
- create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
- (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
-
- if (pgtable_l4_enabled)
- create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
- (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+#ifndef CONFIG_BUILTIN_DTB
+ /* Make sure the fdt fixmap address is always aligned on PMD size */
+ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
- if (IS_ENABLED(CONFIG_64BIT)) {
- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ /* In 32-bit only, the fdt lies in its own PGD */
+ if (!IS_ENABLED(CONFIG_64BIT)) {
+ create_pgd_mapping(early_pg_dir, fix_fdt_va,
+ pa, MAX_FDT_SIZE, PAGE_KERNEL);
+ } else {
+ create_pmd_mapping(fixmap_pmd, fix_fdt_va,
pa, PMD_SIZE, PAGE_KERNEL);
- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
}
- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+ dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
#else
/*
* For 64-bit kernel, __va can't be used since it would return a linear
@@ -1055,7 +1034,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_kernel_page_table(early_pg_dir, true);
/* Setup early mapping for FDT early scan */
- create_fdt_early_page_table(early_pg_dir, dtb_pa);
+ create_fdt_early_page_table(early_pg_dir,
+ __fix_to_virt(FIX_FDT), dtb_pa);
/*
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
@@ -1097,6 +1077,16 @@ static void __init setup_vm_final(void)
u64 i;
/* Setup swapper PGD for fixmap */
+#if !defined(CONFIG_64BIT)
+ /*
+ * In 32-bit, the device tree lies in a pgd entry, so it must be copied
+ * directly in swapper_pg_dir in addition to the pgd entry that points
+ * to fixmap_pte.
+ */
+ unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT));
+
+ set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
+#endif
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 0ee02dae14b2..2cda8d9d7c6e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu)
* handle_external_interrupt - used for external interruption interceptions
* @vcpu: virtual cpu
*
- * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
- * the new PSW does not have external interrupts disabled. In the first case,
- * we've got to deliver the interrupt manually, and in the second case, we
- * drop to userspace to handle the situation there.
+ * This interception occurs if:
+ * - the CPUSTAT_EXT_INT bit was already set when the external interrupt
+ * occurred. In this case, the interrupt needs to be injected manually to
+ * preserve interrupt priority.
+ * - the external new PSW has external interrupts enabled, which will cause an
+ * interruption loop. We drop to userspace in this case.
+ *
+ * The latter case can be detected by inspecting the external mask bit in the
+ * external new psw.
+ *
+ * Under PV, only the latter case can occur, since interrupt priorities are
+ * handled in the ultravisor.
*/
static int handle_external_interrupt(struct kvm_vcpu *vcpu)
{
@@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
vcpu->stat.exit_external_interrupt++;
- rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
- if (rc)
- return rc;
- /* We can not handle clock comparator or timer interrupt with bad PSW */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ newpsw = vcpu->arch.sie_block->gpsw;
+ } else {
+ rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ }
+
+ /*
+ * Clock comparator or timer interrupt with external interrupt enabled
+ * will cause interrupt loop. Drop to userspace.
+ */
if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
(newpsw.mask & PSW_MASK_EXT))
return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 39b36562c043..1eeb9ae57879 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -573,6 +573,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
+ case KVM_CAP_IRQFD_RESAMPLE:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d0846ba818ee..6b1876e4ad3f 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -539,7 +539,7 @@ static void bpf_jit_plt(void *plt, void *ret, void *target)
{
memcpy(plt, bpf_plt, BPF_PLT_SIZE);
*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
+ *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
}
/*
@@ -2010,7 +2010,9 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
} __packed insn;
char expected_plt[BPF_PLT_SIZE];
char current_plt[BPF_PLT_SIZE];
+ char new_plt[BPF_PLT_SIZE];
char *plt;
+ char *ret;
int err;
/* Verify the branch to be patched. */
@@ -2032,12 +2034,15 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
if (err < 0)
return err;
- bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
+ ret = (char *)ip + 6;
+ bpf_jit_plt(expected_plt, ret, old_addr);
if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
return -EINVAL;
/* Adjust the call address. */
+ bpf_jit_plt(new_plt, ret, new_addr);
s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- &new_addr, sizeof(void *));
+ new_plt + (bpf_plt_target - bpf_plt),
+ sizeof(void *));
}
/* Adjust the mask of the branch. */
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index b70559b821df..2106a2bd152b 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -3,9 +3,14 @@ core-y += arch/x86/crypto/
#
# Disable SSE and other FP/SIMD instructions to match normal x86
+# This is required to work around issues in older LLVM versions, but breaks
+# GCC versions < 11. See:
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652
#
+ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+endif
ifeq ($(CONFIG_X86_32),y)
START := 0x8048000
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index cbaf174d8efd..b3af2d45bbbb 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -125,6 +125,8 @@
#define INTEL_FAM6_LUNARLAKE_M 0xBD
+#define INTEL_FAM6_ARROWLAKE 0xC6
+
/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1c38174b5f01..0dac4ab5b55b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -146,7 +146,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
pr_debug("Local APIC address 0x%08x\n", madt->address);
}
- if (madt->header.revision >= 5)
+
+ /* ACPI 6.3 and newer support the online capable bit. */
+ if (acpi_gbl_FADT.header.revision > 6 ||
+ (acpi_gbl_FADT.header.revision == 6 &&
+ acpi_gbl_FADT.minor_revision >= 3))
acpi_support_online_capable = true;
default_acpi_madt_oem_check(madt->header.oem_id,
@@ -193,7 +197,8 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
if (lapic_flags & ACPI_MADT_ENABLED)
return true;
- if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
+ if (!acpi_support_online_capable ||
+ (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
return true;
return false;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f36dc2f796c5..f1197366a97d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -358,12 +358,16 @@ static void __init ms_hyperv_init_platform(void)
* To mirror what Windows does we should extract CPU management
* features and use the ReservedIdentityBit to detect if Linux is the
* root partition. But that requires negotiating CPU management
- * interface (a process to be finalized).
+ * interface (a process to be finalized). For now, use the privilege
+ * flag as the indicator for running as root.
*
- * For now, use the privilege flag as the indicator for running as
- * root.
+ * Hyper-V should never specify running as root and as a Confidential
+ * VM. But to protect against a compromised/malicious Hyper-V trying
+ * to exploit root behavior to expose Confidential VM memory, ignore
+ * the root partition setting if also a Confidential VM.
*/
- if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
+ if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
+ !(ms_hyperv.priv_high & HV_ISOLATION)) {
hv_root_partition = true;
pr_info("Hyper-V: running as root partition\n");
}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ef80d361b463..10622cf2b30f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { }
-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
-static __init void get_rtc_noop(struct timespec64 *now) { }
+static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+static void get_rtc_noop(struct timespec64 *now) { }
static __initconst const struct of_device_id of_cmos_match[] = {
{ .compatible = "motorola,mc146818" },
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 042dee556125..995eb5054360 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
- if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
- && ioapic->irr & (1 << index))
- ioapic_service(ioapic, index, false);
+ if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
+ ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) {
+ /*
+ * Pending status in irr may be outdated: the IRQ line may have
+ * already been deasserted by a device while the IRQ was masked.
+ * This occurs, for instance, if the interrupt is handled in a
+ * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this
+ * case the guest acknowledges the interrupt to the device in
+ * its threaded irq handler, i.e. after the EOI but before
+ * unmasking, so at the time of unmasking the IRQ line is
+ * already down but our pending irr bit is still set. In such
+ * cases, injecting this pending interrupt to the guest is
+ * buggy: the guest will receive an extra unwanted interrupt.
+ *
+ * So we need to check here if the IRQ is actually still pending.
+ * As we are generally not able to probe the IRQ line status
+ * directly, we do it through irqfd resampler. Namely, we clear
+ * the pending status and notify the resampler that this interrupt
+ * is done, without actually injecting it into the guest. If the
+ * IRQ line is actually already deasserted, we are done. If it is
+ * still asserted, a new interrupt will be shortly triggered
+ * through irqfd and injected into the guest.
+ *
+ * If, however, it's not possible to resample (no irqfd resampler
+ * registered for this irq), then unconditionally inject this
+ * pending interrupt into the guest, so the guest will not miss
+ * an interrupt, although may get an extra unwanted interrupt.
+ */
+ if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index))
+ ioapic->irr &= ~(1 << index);
+ else
+ ioapic_service(ioapic, index, false);
+ }
if (e->fields.delivery_mode == APIC_DM_FIXED) {
struct kvm_lapic_irq irq;
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
index 287e98ef9df3..6272dabec02d 100644
--- a/arch/x86/kvm/kvm_onhyperv.h
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
int hv_remote_flush_tlb(struct kvm *kvm);
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
#else /* !CONFIG_HYPERV */
+static inline int hv_remote_flush_tlb(struct kvm *kvm)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
{
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 252e7f37e4e2..f25bc3cbb250 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
-static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
svm->current_vmcb->asid_generation--;
}
+static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+ hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
+
+ /*
+ * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
+ * flush the NPT mappings via hypercall as flushing the ASID only
+ * affects virtual to physical mappings, it does not invalidate guest
+ * physical to host physical mappings.
+ */
+ if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
+ hyperv_flush_guest_mapping(root_tdp);
+
+ svm_flush_tlb_asid(vcpu);
+}
+
+static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
+ * flushes should be routed to hv_remote_flush_tlb() without requesting
+ * a "regular" remote flush. Reaching this point means either there's
+ * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of
+ * which might be fatal to the guest. Yell, but try to recover.
+ */
+ if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
+ hv_remote_flush_tlb(vcpu->kvm);
+
+ svm_flush_tlb_asid(vcpu);
+}
+
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_rflags = svm_set_rflags,
.get_if_flag = svm_get_if_flag,
- .flush_tlb_all = svm_flush_tlb_current,
+ .flush_tlb_all = svm_flush_tlb_all,
.flush_tlb_current = svm_flush_tlb_current,
.flush_tlb_gva = svm_flush_tlb_gva,
- .flush_tlb_guest = svm_flush_tlb_current,
+ .flush_tlb_guest = svm_flush_tlb_asid,
.vcpu_pre_run = svm_vcpu_pre_run,
.vcpu_run = svm_vcpu_run,
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index cff838f15db5..786d46d73a8e 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -6,6 +6,8 @@
#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
+#include <asm/mshyperv.h>
+
#if IS_ENABLED(CONFIG_HYPERV)
#include "kvm_onhyperv.h"
@@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops;
int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+ struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
+
+ return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB &&
+ !!hve->hv_enlightenments_control.enlightened_npt_tlb;
+}
+
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
{
struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
@@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
}
#else
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
{
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1bc2b80273c9..768487611db7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
exit_qual = 0;
}
- if (ex->has_error_code) {
+ /*
+ * Unlike AMD's Paged Real Mode, which reports an error code on #PF
+ * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the
+ * "has error code" flags on VM-Exit if the CPU is in Real Mode.
+ */
+ if (ex->has_error_code && is_protmode(vcpu)) {
/*
* Intel CPUs do not generate error codes with bits 31:16 set,
* and more importantly VMX disallows setting bits 31:16 in the
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7713420abab0..3d852ce84920 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4432,6 +4432,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VAPIC:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
+ case KVM_CAP_IRQFD_RESAMPLE:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -8903,6 +8904,8 @@ restart:
}
if (ctxt->have_exception) {
+ WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write);
+ vcpu->mmio_needed = false;
r = 1;
inject_emulated_exception(vcpu);
} else if (vcpu->arch.pio.count) {
@@ -9906,13 +9909,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
+ /*
+ * Suppress the error code if the vCPU is in Real Mode, as Real Mode
+ * exceptions don't report error codes. The presence of an error code
+ * is carried with the exception and only stripped when the exception
+ * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do
+ * report an error code despite the CPU being in Real Mode.
+ */
+ vcpu->arch.exception.has_error_code &= is_protmode(vcpu);
+
trace_kvm_inj_exception(vcpu->arch.exception.vector,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.injected);
- if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
- vcpu->arch.exception.error_code = false;
static_call(kvm_x86_inject_exception)(vcpu);
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 615a76d70019..bf5161dcf89e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -7,6 +7,7 @@
#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/vgaarb.h>
+#include <asm/amd_nb.h>
#include <asm/hpet.h>
#include <asm/pci_x86.h>
@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
#endif
+
+#ifdef CONFIG_AMD_NB
+
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L
+
+static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
+{
+ u32 data;
+
+ if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
+ data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
+ if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
+ pci_err(dev, "Failed to write data 0x%x\n", data);
+ } else {
+ pci_err(dev, "Failed to read data\n");
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
+#endif
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index cf1a39adf9a5..f0ea9dcfb966 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1359,8 +1359,6 @@ bool blk_rq_is_poll(struct request *rq)
return false;
if (rq->mq_hctx->type != HCTX_TYPE_POLL)
return false;
- if (WARN_ON_ONCE(!rq->bio))
- return false;
return true;
}
EXPORT_SYMBOL_GPL(blk_rq_is_poll);
@@ -1368,7 +1366,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
do {
- bio_poll(rq->bio, NULL, 0);
+ blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
cond_resched();
} while (!completion_done(wait));
}
diff --git a/block/genhd.c b/block/genhd.c
index 02d9cfb9e077..7f874737af68 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -368,7 +368,6 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
if (disk->open_partitions)
return -EBUSY;
- set_bit(GD_NEED_PART_SCAN, &disk->state);
/*
* If the device is opened exclusively by current thread already, it's
* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
@@ -381,12 +380,19 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return ret;
}
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
ret = PTR_ERR(bdev);
else
blkdev_put(bdev, mode & ~FMODE_EXCL);
+ /*
+ * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
+ * and this will cause that re-assemble partitioned raid device will
+ * creat partition for underlying disk.
+ */
+ clear_bit(GD_NEED_PART_SCAN, &disk->state);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(disk->part0, disk_scan_partitions);
return ret;
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 55eb0203817f..8598056216e7 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -914,7 +914,7 @@ void hl_hwmon_fini(struct hl_device *hdev)
void hl_hwmon_release_resources(struct hl_device *hdev)
{
- const struct hwmon_channel_info **channel_info_arr;
+ const struct hwmon_channel_info * const *channel_info_arr;
int i = 0;
if (!hdev->hl_chip_info->info)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 910301fae435..3c6f1e16cf2f 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -461,26 +461,22 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
- ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
- &acquire_ctx);
+ ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
if (ret) {
ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
return ret;
}
- for (i = 0; i < buf_count; i++) {
- ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
- if (ret) {
- ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
- goto unlock_reservations;
- }
+ ret = dma_resv_reserve_fences(bo->base.resv, 1);
+ if (ret) {
+ ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+ goto unlock_reservations;
}
- for (i = 0; i < buf_count; i++)
- dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+ dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
unlock_reservations:
- drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
+ drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
wmb(); /* Flush write combining buffers */
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 7df72fa8100f..bde42d6383da 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -140,32 +140,28 @@ int ivpu_pm_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
- int ret;
+ unsigned long timeout;
ivpu_dbg(vdev, PM, "Suspend..\n");
- ret = ivpu_suspend(vdev);
- if (ret && vdev->pm->suspend_reschedule_counter) {
- ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
- vdev->pm->suspend_reschedule_counter);
- pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
- vdev->pm->suspend_reschedule_counter--;
- return -EBUSY;
- } else if (!vdev->pm->suspend_reschedule_counter) {
- ivpu_warn(vdev, "Failed to enter idle, force suspend\n");
- ivpu_pm_prepare_cold_boot(vdev);
- } else {
- ivpu_pm_prepare_warm_boot(vdev);
+ timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
+ while (!ivpu_hw_is_idle(vdev)) {
+ cond_resched();
+ if (time_after_eq(jiffies, timeout)) {
+ ivpu_err(vdev, "Failed to enter idle on system suspend\n");
+ return -EBUSY;
+ }
}
- vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+ ivpu_suspend(vdev);
+ ivpu_pm_prepare_warm_boot(vdev);
pci_save_state(to_pci_dev(dev));
pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
ivpu_dbg(vdev, PM, "Suspend done.\n");
- return ret;
+ return 0;
}
int ivpu_pm_resume_cb(struct device *dev)
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 97b711e57bff..c7a6d0b69dab 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -1984,6 +1984,7 @@ static int instance;
static int acpi_video_bus_add(struct acpi_device *device)
{
struct acpi_video_bus *video;
+ bool auto_detect;
int error;
acpi_status status;
@@ -2045,10 +2046,20 @@ static int acpi_video_bus_add(struct acpi_device *device)
mutex_unlock(&video_list_lock);
/*
- * The userspace visible backlight_device gets registered separately
- * from acpi_video_register_backlight().
+ * If backlight-type auto-detection is used then a native backlight may
+ * show up later and this may change the result from video to native.
+ * Therefor normally the userspace visible /sys/class/backlight device
+ * gets registered separately by the GPU driver calling
+ * acpi_video_register_backlight() when an internal panel is detected.
+ * Register the backlight now when not using auto-detection, so that
+ * when the kernel cmdline or DMI-quirks are used the backlight will
+ * get registered even if acpi_video_register_backlight() is not called.
*/
acpi_video_run_bcl_for_osi(video);
+ if (__acpi_video_get_backlight_type(false, &auto_detect) == acpi_backlight_video &&
+ !auto_detect)
+ acpi_video_bus_register_backlight(video);
+
acpi_video_bus_add_notify_handler(video);
return 0;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 9531dd0fef50..a96da65057b1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -459,85 +459,67 @@ out_free:
Notification Handling
-------------------------------------------------------------------------- */
-/*
- * acpi_bus_notify
- * ---------------
- * Callback for all 'system-level' device notifications (values 0x00-0x7F).
+/**
+ * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler
+ * @handle: Target ACPI object.
+ * @type: Notification type.
+ * @data: Ignored.
+ *
+ * This only handles notifications related to device hotplug.
*/
static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
{
struct acpi_device *adev;
- u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
- bool hotplug_event = false;
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
- hotplug_event = true;
break;
case ACPI_NOTIFY_DEVICE_CHECK:
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
- hotplug_event = true;
break;
case ACPI_NOTIFY_DEVICE_WAKE:
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
- break;
+ return;
case ACPI_NOTIFY_EJECT_REQUEST:
acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
- hotplug_event = true;
break;
case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
/* TBD: Exactly what does 'light' mean? */
- break;
+ return;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
acpi_handle_err(handle, "Device cannot be configured due "
"to a frequency mismatch\n");
- break;
+ return;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
acpi_handle_err(handle, "Device cannot be configured due "
"to a bus mode mismatch\n");
- break;
+ return;
case ACPI_NOTIFY_POWER_FAULT:
acpi_handle_err(handle, "Device has suffered a power fault\n");
- break;
+ return;
default:
acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
- break;
+ return;
}
adev = acpi_get_acpi_dev(handle);
- if (!adev)
- goto err;
-
- if (adev->dev.driver) {
- struct acpi_driver *driver = to_acpi_driver(adev->dev.driver);
-
- if (driver && driver->ops.notify &&
- (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
- driver->ops.notify(adev, type);
- }
-
- if (!hotplug_event) {
- acpi_put_acpi_dev(adev);
- return;
- }
- if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
+ if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
return;
acpi_put_acpi_dev(adev);
- err:
- acpi_evaluate_ost(handle, type, ost_code, NULL);
+ acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
}
static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
@@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data)
return ACPI_INTERRUPT_HANDLED;
}
-static int acpi_device_install_notify_handler(struct acpi_device *device)
+static int acpi_device_install_notify_handler(struct acpi_device *device,
+ struct acpi_driver *acpi_drv)
{
acpi_status status;
- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
+ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
status =
acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
acpi_device_fixed_event,
device);
- else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
+ } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
status =
acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
acpi_device_fixed_event,
device);
- else
- status = acpi_install_notify_handler(device->handle,
- ACPI_DEVICE_NOTIFY,
+ } else {
+ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+ ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
+
+ status = acpi_install_notify_handler(device->handle, type,
acpi_notify_device,
device);
+ }
if (ACPI_FAILURE(status))
return -EINVAL;
return 0;
}
-static void acpi_device_remove_notify_handler(struct acpi_device *device)
+static void acpi_device_remove_notify_handler(struct acpi_device *device,
+ struct acpi_driver *acpi_drv)
{
- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
+ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
acpi_device_fixed_event);
- else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
+ } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
acpi_device_fixed_event);
- else
- acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+ } else {
+ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+ ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
+
+ acpi_remove_notify_handler(device->handle, type,
acpi_notify_device);
+ }
}
/* Handle events targeting \_SB device (at present only graceful shutdown) */
@@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev)
acpi_drv->name, acpi_dev->pnp.bus_id);
if (acpi_drv->ops.notify) {
- ret = acpi_device_install_notify_handler(acpi_dev);
+ ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv);
if (ret) {
if (acpi_drv->ops.remove)
acpi_drv->ops.remove(acpi_dev);
@@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev)
struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver);
if (acpi_drv->ops.notify)
- acpi_device_remove_notify_handler(acpi_dev);
+ acpi_device_remove_notify_handler(acpi_dev, acpi_drv);
if (acpi_drv->ops.remove)
acpi_drv->ops.remove(acpi_dev);
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 7b4801ce62d6..e8492b3a393a 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -440,6 +440,13 @@ static const struct dmi_system_id asus_laptop[] = {
},
},
{
+ .ident = "Asus ExpertBook B1502CBA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "B1502CBA"),
+ },
+ },
+ {
.ident = "Asus ExpertBook B2402CBA",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index fd7cbce8076e..e85729fc481f 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -277,6 +277,43 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
/*
+ * Models which need acpi_video backlight control where the GPU drivers
+ * do not call acpi_video_register_backlight() because no internal panel
+ * is detected. Typically these are all-in-ones (monitors with builtin
+ * PC) where the panel connection shows up as regular DP instead of eDP.
+ */
+ {
+ .callback = video_detect_force_video,
+ /* Apple iMac14,1 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac14,1"),
+ },
+ },
+ {
+ .callback = video_detect_force_video,
+ /* Apple iMac14,2 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac14,2"),
+ },
+ },
+
+ /*
+ * Older models with nvidia GPU which need acpi_video backlight
+ * control and where the old nvidia binary driver series does not
+ * call acpi_video_register_backlight().
+ */
+ {
+ .callback = video_detect_force_video,
+ /* ThinkPad W530 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W530"),
+ },
+ },
+
+ /*
* These models have a working acpi_video backlight control, and using
* native backlight causes a regression where backlight does not work
* when userspace is not handling brightness key events. Disable
@@ -782,7 +819,7 @@ static bool prefer_native_over_acpi_video(void)
* Determine which type of backlight interface to use on this system,
* First check cmdline, then dmi quirks, then do autodetect.
*/
-static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
+enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto_detect)
{
static DEFINE_MUTEX(init_mutex);
static bool nvidia_wmi_ec_present;
@@ -807,6 +844,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
native_available = true;
mutex_unlock(&init_mutex);
+ if (auto_detect)
+ *auto_detect = false;
+
/*
* The below heuristics / detection steps are in order of descending
* presedence. The commandline takes presedence over anything else.
@@ -818,6 +858,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
if (acpi_backlight_dmi != acpi_backlight_undef)
return acpi_backlight_dmi;
+ if (auto_detect)
+ *auto_detect = true;
+
/* Special cases such as nvidia_wmi_ec and apple gmux. */
if (nvidia_wmi_ec_present)
return acpi_backlight_nvidia_wmi_ec;
@@ -837,15 +880,4 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
/* No ACPI video/native (old hw), use vendor specific fw methods. */
return acpi_backlight_vendor;
}
-
-enum acpi_backlight_type acpi_video_get_backlight_type(void)
-{
- return __acpi_video_get_backlight_type(false);
-}
-EXPORT_SYMBOL(acpi_video_get_backlight_type);
-
-bool acpi_video_backlight_use_native(void)
-{
- return __acpi_video_get_backlight_type(true) == acpi_backlight_native;
-}
-EXPORT_SYMBOL(acpi_video_backlight_use_native);
+EXPORT_SYMBOL(__acpi_video_get_backlight_type);
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index da5727069d85..ba420a28a4aa 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -213,6 +213,7 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s
disk in the system.
*/
static const struct x86_cpu_id storage_d3_cpu_ids[] = {
+ X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */
X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */
X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */
X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index f6573c335f4c..f3903d002819 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -474,12 +474,18 @@ int detect_cache_attributes(unsigned int cpu)
populate_leaves:
/*
- * populate_cache_leaves() may completely setup the cache leaves and
- * shared_cpu_map or it may leave it partially setup.
+ * If LLC is valid the cache leaves were already populated so just go to
+ * update the cpu map.
*/
- ret = populate_cache_leaves(cpu);
- if (ret)
- goto free_ci;
+ if (!last_level_cache_is_valid(cpu)) {
+ /*
+ * populate_cache_leaves() may completely setup the cache leaves and
+ * shared_cpu_map or it may leave it partially setup.
+ */
+ ret = populate_cache_leaves(cpu);
+ if (ret)
+ goto free_ci;
+ }
/*
* For systems using DT for cache hierarchy, fw_token
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28eb59fd71ca..bc31bb7072a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
- /* suppress uevents while reconfiguring the device */
- dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
}
}
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
@@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+
loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
+
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
- error = 0;
-done:
- /* enable and uncork uevent now that we are done */
- dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
- return error;
+ return 0;
out_unlock:
loop_global_unlock(lo, is_loop);
@@ -1130,7 +1130,7 @@ out_putf:
fput(file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- goto done;
+ return error;
}
static void __loop_clr_fd(struct loop_device *lo, bool release)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index c73cc57ec547..604c1a13c76e 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -246,7 +246,7 @@ static int ublk_validate_params(const struct ublk_device *ub)
if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
const struct ublk_param_basic *p = &ub->params.basic;
- if (p->logical_bs_shift > PAGE_SHIFT)
+ if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9)
return -EINVAL;
if (p->logical_bs_shift > p->physical_bs_shift)
@@ -1261,9 +1261,10 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
ublk_queue_cmd(ubq, req);
}
-static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
+ unsigned int issue_flags,
+ struct ublksrv_io_cmd *ub_cmd)
{
- struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
struct ublk_io *io;
@@ -1362,6 +1363,23 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
return -EIOCBQUEUED;
}
+static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd;
+ struct ublksrv_io_cmd ub_cmd;
+
+ /*
+ * Not necessary for async retry, but let's keep it simple and always
+ * copy the values to avoid any potential reuse.
+ */
+ ub_cmd.q_id = READ_ONCE(ub_src->q_id);
+ ub_cmd.tag = READ_ONCE(ub_src->tag);
+ ub_cmd.result = READ_ONCE(ub_src->result);
+ ub_cmd.addr = READ_ONCE(ub_src->addr);
+
+ return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
+}
+
static const struct file_operations ublk_ch_fops = {
.owner = THIS_MODULE,
.open = ublk_ch_open,
@@ -1952,6 +1970,8 @@ static int ublk_ctrl_set_params(struct ublk_device *ub,
/* clear all we don't support yet */
ub->params.types &= UBLK_PARAM_TYPE_ALL;
ret = ublk_validate_params(ub);
+ if (ret)
+ ub->params.types = 0;
}
mutex_unlock(&ub->mutex);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2723eede6f21..2b918e28acaa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -96,16 +96,14 @@ struct virtblk_req {
/*
* The zone append command has an extended in header.
- * The status field in zone_append_in_hdr must have
- * the same offset in virtblk_req as the non-zoned
- * status field above.
+ * The status field in zone_append_in_hdr must always
+ * be the last byte.
*/
struct {
+ __virtio64 sector;
u8 status;
- u8 reserved[7];
- __le64 append_sector;
- } zone_append_in_hdr;
- };
+ } zone_append;
+ } in_hdr;
size_t in_hdr_len;
@@ -154,7 +152,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
sgs[num_out + num_in++] = vbr->sg_table.sgl;
}
- sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len);
+ sg_init_one(&in_hdr, &vbr->in_hdr.status, vbr->in_hdr_len);
sgs[num_out + num_in++] = &in_hdr;
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
@@ -242,11 +240,14 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
struct request *req,
struct virtblk_req *vbr)
{
- size_t in_hdr_len = sizeof(vbr->status);
+ size_t in_hdr_len = sizeof(vbr->in_hdr.status);
bool unmap = false;
u32 type;
u64 sector = 0;
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && op_is_zone_mgmt(req_op(req)))
+ return BLK_STS_NOTSUPP;
+
/* Set fields for all request types */
vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
@@ -287,7 +288,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
case REQ_OP_ZONE_APPEND:
type = VIRTIO_BLK_T_ZONE_APPEND;
sector = blk_rq_pos(req);
- in_hdr_len = sizeof(vbr->zone_append_in_hdr);
+ in_hdr_len = sizeof(vbr->in_hdr.zone_append);
break;
case REQ_OP_ZONE_RESET:
type = VIRTIO_BLK_T_ZONE_RESET;
@@ -297,7 +298,10 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
type = VIRTIO_BLK_T_ZONE_RESET_ALL;
break;
case REQ_OP_DRV_IN:
- /* Out header already filled in, nothing to do */
+ /*
+ * Out header has already been prepared by the caller (virtblk_get_id()
+ * or virtblk_submit_zone_report()), nothing to do here.
+ */
return 0;
default:
WARN_ON_ONCE(1);
@@ -318,16 +322,28 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
return 0;
}
+/*
+ * The status byte is always the last byte of the virtblk request
+ * in-header. This helper fetches its value for all in-header formats
+ * that are currently defined.
+ */
+static inline u8 virtblk_vbr_status(struct virtblk_req *vbr)
+{
+ return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1);
+}
+
static inline void virtblk_request_done(struct request *req)
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- blk_status_t status = virtblk_result(vbr->status);
+ blk_status_t status = virtblk_result(virtblk_vbr_status(vbr));
+ struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
virtblk_unmap_data(req, vbr);
virtblk_cleanup_cmd(req);
if (req_op(req) == REQ_OP_ZONE_APPEND)
- req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector);
+ req->__sector = virtio64_to_cpu(vblk->vdev,
+ vbr->in_hdr.zone_append.sector);
blk_mq_end_request(req, status);
}
@@ -355,7 +371,7 @@ static int virtblk_handle_req(struct virtio_blk_vq *vq,
if (likely(!blk_should_fake_timeout(req->q)) &&
!blk_mq_complete_request_remote(req) &&
- !blk_mq_add_to_batch(req, iob, vbr->status,
+ !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
virtblk_complete_batch))
virtblk_request_done(req);
req_done++;
@@ -550,7 +566,6 @@ static void virtio_queue_rqs(struct request **rqlist)
#ifdef CONFIG_BLK_DEV_ZONED
static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk,
unsigned int nr_zones,
- unsigned int zone_sectors,
size_t *buflen)
{
struct request_queue *q = vblk->disk->queue;
@@ -558,7 +573,7 @@ static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk,
void *buf;
nr_zones = min_t(unsigned int, nr_zones,
- get_capacity(vblk->disk) >> ilog2(zone_sectors));
+ get_capacity(vblk->disk) >> ilog2(vblk->zone_sectors));
bufsize = sizeof(struct virtio_blk_zone_report) +
nr_zones * sizeof(struct virtio_blk_zone_descriptor);
@@ -592,7 +607,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk,
return PTR_ERR(req);
vbr = blk_mq_rq_to_pdu(req);
- vbr->in_hdr_len = sizeof(vbr->status);
+ vbr->in_hdr_len = sizeof(vbr->in_hdr.status);
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT);
vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector);
@@ -601,7 +616,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk,
goto out;
blk_execute_rq(req, false);
- err = blk_status_to_errno(virtblk_result(vbr->status));
+ err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status));
out:
blk_mq_free_request(req);
return err;
@@ -609,29 +624,72 @@ out:
static int virtblk_parse_zone(struct virtio_blk *vblk,
struct virtio_blk_zone_descriptor *entry,
- unsigned int idx, unsigned int zone_sectors,
- report_zones_cb cb, void *data)
+ unsigned int idx, report_zones_cb cb, void *data)
{
struct blk_zone zone = { };
- if (entry->z_type != VIRTIO_BLK_ZT_SWR &&
- entry->z_type != VIRTIO_BLK_ZT_SWP &&
- entry->z_type != VIRTIO_BLK_ZT_CONV) {
- dev_err(&vblk->vdev->dev, "invalid zone type %#x\n",
- entry->z_type);
- return -EINVAL;
+ zone.start = virtio64_to_cpu(vblk->vdev, entry->z_start);
+ if (zone.start + vblk->zone_sectors <= get_capacity(vblk->disk))
+ zone.len = vblk->zone_sectors;
+ else
+ zone.len = get_capacity(vblk->disk) - zone.start;
+ zone.capacity = virtio64_to_cpu(vblk->vdev, entry->z_cap);
+ zone.wp = virtio64_to_cpu(vblk->vdev, entry->z_wp);
+
+ switch (entry->z_type) {
+ case VIRTIO_BLK_ZT_SWR:
+ zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
+ break;
+ case VIRTIO_BLK_ZT_SWP:
+ zone.type = BLK_ZONE_TYPE_SEQWRITE_PREF;
+ break;
+ case VIRTIO_BLK_ZT_CONV:
+ zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ break;
+ default:
+ dev_err(&vblk->vdev->dev, "zone %llu: invalid type %#x\n",
+ zone.start, entry->z_type);
+ return -EIO;
}
- zone.type = entry->z_type;
- zone.cond = entry->z_state;
- zone.len = zone_sectors;
- zone.capacity = le64_to_cpu(entry->z_cap);
- zone.start = le64_to_cpu(entry->z_start);
- if (zone.cond == BLK_ZONE_COND_FULL)
+ switch (entry->z_state) {
+ case VIRTIO_BLK_ZS_EMPTY:
+ zone.cond = BLK_ZONE_COND_EMPTY;
+ break;
+ case VIRTIO_BLK_ZS_CLOSED:
+ zone.cond = BLK_ZONE_COND_CLOSED;
+ break;
+ case VIRTIO_BLK_ZS_FULL:
+ zone.cond = BLK_ZONE_COND_FULL;
zone.wp = zone.start + zone.len;
- else
- zone.wp = le64_to_cpu(entry->z_wp);
+ break;
+ case VIRTIO_BLK_ZS_EOPEN:
+ zone.cond = BLK_ZONE_COND_EXP_OPEN;
+ break;
+ case VIRTIO_BLK_ZS_IOPEN:
+ zone.cond = BLK_ZONE_COND_IMP_OPEN;
+ break;
+ case VIRTIO_BLK_ZS_NOT_WP:
+ zone.cond = BLK_ZONE_COND_NOT_WP;
+ break;
+ case VIRTIO_BLK_ZS_RDONLY:
+ zone.cond = BLK_ZONE_COND_READONLY;
+ zone.wp = ULONG_MAX;
+ break;
+ case VIRTIO_BLK_ZS_OFFLINE:
+ zone.cond = BLK_ZONE_COND_OFFLINE;
+ zone.wp = ULONG_MAX;
+ break;
+ default:
+ dev_err(&vblk->vdev->dev, "zone %llu: invalid condition %#x\n",
+ zone.start, entry->z_state);
+ return -EIO;
+ }
+ /*
+ * The callback below checks the validity of the reported
+ * entry data, no need to further validate it here.
+ */
return cb(&zone, idx, data);
}
@@ -641,39 +699,47 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
{
struct virtio_blk *vblk = disk->private_data;
struct virtio_blk_zone_report *report;
- unsigned int zone_sectors = vblk->zone_sectors;
- unsigned int nz, i;
- int ret, zone_idx = 0;
+ unsigned long long nz, i;
size_t buflen;
+ unsigned int zone_idx = 0;
+ int ret;
if (WARN_ON_ONCE(!vblk->zone_sectors))
return -EOPNOTSUPP;
- report = virtblk_alloc_report_buffer(vblk, nr_zones,
- zone_sectors, &buflen);
+ report = virtblk_alloc_report_buffer(vblk, nr_zones, &buflen);
if (!report)
return -ENOMEM;
+ mutex_lock(&vblk->vdev_mutex);
+
+ if (!vblk->vdev) {
+ ret = -ENXIO;
+ goto fail_report;
+ }
+
while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) {
memset(report, 0, buflen);
ret = virtblk_submit_zone_report(vblk, (char *)report,
buflen, sector);
- if (ret) {
- if (ret > 0)
- ret = -EIO;
- goto out_free;
- }
- nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
+ if (ret)
+ goto fail_report;
+
+ nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones),
+ nr_zones);
if (!nz)
break;
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
ret = virtblk_parse_zone(vblk, &report->zones[i],
- zone_idx, zone_sectors, cb, data);
+ zone_idx, cb, data);
if (ret)
- goto out_free;
- sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors;
+ goto fail_report;
+
+ sector = virtio64_to_cpu(vblk->vdev,
+ report->zones[i].z_start) +
+ vblk->zone_sectors;
zone_idx++;
}
}
@@ -682,7 +748,8 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
ret = zone_idx;
else
ret = -EINVAL;
-out_free:
+fail_report:
+ mutex_unlock(&vblk->vdev_mutex);
kvfree(report);
return ret;
}
@@ -691,20 +758,28 @@ static void virtblk_revalidate_zones(struct virtio_blk *vblk)
{
u8 model;
- if (!vblk->zone_sectors)
- return;
-
virtio_cread(vblk->vdev, struct virtio_blk_config,
zoned.model, &model);
- if (!blk_revalidate_disk_zones(vblk->disk, NULL))
- set_capacity_and_notify(vblk->disk, 0);
+ switch (model) {
+ default:
+ dev_err(&vblk->vdev->dev, "unknown zone model %d\n", model);
+ fallthrough;
+ case VIRTIO_BLK_Z_NONE:
+ case VIRTIO_BLK_Z_HA:
+ disk_set_zoned(vblk->disk, BLK_ZONED_NONE);
+ return;
+ case VIRTIO_BLK_Z_HM:
+ WARN_ON_ONCE(!vblk->zone_sectors);
+ if (!blk_revalidate_disk_zones(vblk->disk, NULL))
+ set_capacity_and_notify(vblk->disk, 0);
+ }
}
static int virtblk_probe_zoned_device(struct virtio_device *vdev,
struct virtio_blk *vblk,
struct request_queue *q)
{
- u32 v;
+ u32 v, wg;
u8 model;
int ret;
@@ -713,16 +788,11 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
switch (model) {
case VIRTIO_BLK_Z_NONE:
+ case VIRTIO_BLK_Z_HA:
+ /* Present the host-aware device as non-zoned */
return 0;
case VIRTIO_BLK_Z_HM:
break;
- case VIRTIO_BLK_Z_HA:
- /*
- * Present the host-aware device as a regular drive.
- * TODO It is possible to add an option to make it appear
- * in the system as a zoned drive.
- */
- return 0;
default:
dev_err(&vdev->dev, "unsupported zone model %d\n", model);
return -EINVAL;
@@ -735,32 +805,31 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
- disk_set_max_open_zones(vblk->disk, le32_to_cpu(v));
-
- dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v));
+ disk_set_max_open_zones(vblk->disk, v);
+ dev_dbg(&vdev->dev, "max open zones = %u\n", v);
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_active_zones, &v);
- disk_set_max_active_zones(vblk->disk, le32_to_cpu(v));
- dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v));
+ disk_set_max_active_zones(vblk->disk, v);
+ dev_dbg(&vdev->dev, "max active zones = %u\n", v);
virtio_cread(vdev, struct virtio_blk_config,
- zoned.write_granularity, &v);
- if (!v) {
+ zoned.write_granularity, &wg);
+ if (!wg) {
dev_warn(&vdev->dev, "zero write granularity reported\n");
return -ENODEV;
}
- blk_queue_physical_block_size(q, le32_to_cpu(v));
- blk_queue_io_min(q, le32_to_cpu(v));
+ blk_queue_physical_block_size(q, wg);
+ blk_queue_io_min(q, wg);
- dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v));
+ dev_dbg(&vdev->dev, "write granularity = %u\n", wg);
/*
* virtio ZBD specification doesn't require zones to be a power of
* two sectors in size, but the code in this driver expects that.
*/
- virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v);
- vblk->zone_sectors = le32_to_cpu(v);
+ virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors,
+ &vblk->zone_sectors);
if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) {
dev_err(&vdev->dev,
"zoned device with non power of two zone size %u\n",
@@ -783,36 +852,46 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
return -ENODEV;
}
- blk_queue_max_zone_append_sectors(q, le32_to_cpu(v));
- dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v));
+ if ((v << SECTOR_SHIFT) < wg) {
+ dev_err(&vdev->dev,
+ "write granularity %u exceeds max_append_sectors %u limit\n",
+ wg, v);
+ return -ENODEV;
+ }
+
+ blk_queue_max_zone_append_sectors(q, v);
+ dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
}
return ret;
}
-static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev)
-{
- return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED);
-}
#else
/*
* Zoned block device support is not configured in this kernel.
- * We only need to define a few symbols to avoid compilation errors.
+ * Host-managed zoned devices can't be supported, but others are
+ * good to go as regular block devices.
*/
#define virtblk_report_zones NULL
+
static inline void virtblk_revalidate_zones(struct virtio_blk *vblk)
{
}
+
static inline int virtblk_probe_zoned_device(struct virtio_device *vdev,
struct virtio_blk *vblk, struct request_queue *q)
{
- return -EOPNOTSUPP;
-}
+ u8 model;
-static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev)
-{
- return false;
+ virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model);
+ if (model == VIRTIO_BLK_Z_HM) {
+ dev_err(&vdev->dev,
+ "virtio_blk: zoned devices are not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
#endif /* CONFIG_BLK_DEV_ZONED */
@@ -831,7 +910,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
return PTR_ERR(req);
vbr = blk_mq_rq_to_pdu(req);
- vbr->in_hdr_len = sizeof(vbr->status);
+ vbr->in_hdr_len = sizeof(vbr->in_hdr.status);
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
vbr->out_hdr.sector = 0;
@@ -840,7 +919,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
goto out;
blk_execute_rq(req, false);
- err = blk_status_to_errno(virtblk_result(vbr->status));
+ err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status));
out:
blk_mq_free_request(req);
return err;
@@ -1498,15 +1577,16 @@ static int virtblk_probe(struct virtio_device *vdev)
virtblk_update_capacity(vblk, false);
virtio_device_ready(vdev);
- if (virtblk_has_zoned_feature(vdev)) {
+ /*
+ * All steps that follow use the VQs therefore they need to be
+ * placed after the virtio_device_ready() call above.
+ */
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
err = virtblk_probe_zoned_device(vdev, vblk, q);
if (err)
goto out_cleanup_disk;
}
- dev_info(&vdev->dev, "blk config size: %zu\n",
- sizeof(struct virtio_blk_config));
-
err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
if (err)
goto out_cleanup_disk;
@@ -1607,10 +1687,7 @@ static unsigned int features[] = {
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
- VIRTIO_BLK_F_SECURE_ERASE,
-#ifdef CONFIG_BLK_DEV_ZONED
- VIRTIO_BLK_F_ZONED,
-#endif /* CONFIG_BLK_DEV_ZONED */
+ VIRTIO_BLK_F_SECURE_ERASE, VIRTIO_BLK_F_ZONED,
};
static struct virtio_driver virtio_blk = {
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 3006e2a0f37e..43e98a598bd9 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -511,7 +511,7 @@ static const char *btbcm_get_board_name(struct device *dev)
len = strlen(tmp) + 1;
board_type = devm_kzalloc(dev, len, GFP_KERNEL);
strscpy(board_type, tmp, len);
- for (i = 0; i < board_type[i]; i++) {
+ for (i = 0; i < len; i++) {
if (board_type[i] == '/')
board_type[i] = '-';
}
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 02893600db39..51000320e1ea 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -358,6 +358,7 @@ static void btsdio_remove(struct sdio_func *func)
if (!data)
return;
+ cancel_work_sync(&data->work);
hdev = data->hdev;
sdio_set_drvdata(func, NULL);
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index 36d42484142a..cf463c1d2102 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -329,6 +329,12 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
"Failed to setup timing for '%pOF'\n", rd->dn);
if (!of_node_check_flag(rd->dn, OF_POPULATED)) {
+ /*
+ * Clear the flag before adding the device so that
+ * fw_devlink doesn't skip adding consumers to this
+ * device.
+ */
+ rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
if (!of_platform_device_create(rd->dn, NULL, &pdev->dev)) {
dev_err(&pdev->dev,
"Failed to create child device '%pOF'\n",
diff --git a/drivers/clk/clk-renesas-pcie.c b/drivers/clk/clk-renesas-pcie.c
index f91f30560820..ff3a52d48479 100644
--- a/drivers/clk/clk-renesas-pcie.c
+++ b/drivers/clk/clk-renesas-pcie.c
@@ -143,8 +143,9 @@ static int rs9_regmap_i2c_read(void *context,
static const struct regmap_config rs9_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
- .cache_type = REGCACHE_NONE,
+ .cache_type = REGCACHE_FLAT,
.max_register = RS9_REG_BCP,
+ .num_reg_defaults_raw = 0x8,
.rd_table = &rs9_readable_table,
.wr_table = &rs9_writeable_table,
.reg_write = rs9_regmap_i2c_write,
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 2836adb817b7..e3696a88b5a3 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -95,14 +95,16 @@ static const struct clk_div_table video_div_table[] = {
{ }
};
-static const char * enet1_ref_sels[] = { "enet1_ref_125m", "enet1_ref_pad", };
+static const char * enet1_ref_sels[] = { "enet1_ref_125m", "enet1_ref_pad", "dummy", "dummy"};
static const u32 enet1_ref_sels_table[] = { IMX6UL_GPR1_ENET1_TX_CLK_DIR,
- IMX6UL_GPR1_ENET1_CLK_SEL };
+ IMX6UL_GPR1_ENET1_CLK_SEL, 0,
+ IMX6UL_GPR1_ENET1_TX_CLK_DIR | IMX6UL_GPR1_ENET1_CLK_SEL };
static const u32 enet1_ref_sels_table_mask = IMX6UL_GPR1_ENET1_TX_CLK_DIR |
IMX6UL_GPR1_ENET1_CLK_SEL;
-static const char * enet2_ref_sels[] = { "enet2_ref_125m", "enet2_ref_pad", };
+static const char * enet2_ref_sels[] = { "enet2_ref_125m", "enet2_ref_pad", "dummy", "dummy"};
static const u32 enet2_ref_sels_table[] = { IMX6UL_GPR1_ENET2_TX_CLK_DIR,
- IMX6UL_GPR1_ENET2_CLK_SEL };
+ IMX6UL_GPR1_ENET2_CLK_SEL, 0,
+ IMX6UL_GPR1_ENET2_TX_CLK_DIR | IMX6UL_GPR1_ENET2_CLK_SEL };
static const u32 enet2_ref_sels_table_mask = IMX6UL_GPR1_ENET2_TX_CLK_DIR |
IMX6UL_GPR1_ENET2_CLK_SEL;
diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c
index ce81e4087a8f..2bfbab8db94b 100644
--- a/drivers/clk/sprd/common.c
+++ b/drivers/clk/sprd/common.c
@@ -17,7 +17,6 @@ static const struct regmap_config sprdclk_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0xffff,
.fast_io = true,
};
@@ -43,6 +42,8 @@ int sprd_clk_regmap_init(struct platform_device *pdev,
struct device *dev = &pdev->dev;
struct device_node *node = dev->of_node, *np;
struct regmap *regmap;
+ struct resource *res;
+ struct regmap_config reg_config = sprdclk_regmap_config;
if (of_find_property(node, "sprd,syscon", NULL)) {
regmap = syscon_regmap_lookup_by_phandle(node, "sprd,syscon");
@@ -59,12 +60,14 @@ int sprd_clk_regmap_init(struct platform_device *pdev,
return PTR_ERR(regmap);
}
} else {
- base = devm_platform_ioremap_resource(pdev, 0);
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(base))
return PTR_ERR(base);
+ reg_config.max_register = resource_size(res) - reg_config.reg_stride;
+
regmap = devm_regmap_init_mmio(&pdev->dev, base,
- &sprdclk_regmap_config);
+ &reg_config);
if (IS_ERR(regmap)) {
pr_err("failed to init regmap\n");
return PTR_ERR(regmap);
diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c
index deed4afadb29..d9cb937665cf 100644
--- a/drivers/counter/104-quad-8.c
+++ b/drivers/counter/104-quad-8.c
@@ -97,10 +97,6 @@ struct quad8 {
struct quad8_reg __iomem *reg;
};
-/* Borrow Toggle flip-flop */
-#define QUAD8_FLAG_BT BIT(0)
-/* Carry Toggle flip-flop */
-#define QUAD8_FLAG_CT BIT(1)
/* Error flag */
#define QUAD8_FLAG_E BIT(4)
/* Up/Down flag */
@@ -133,6 +129,9 @@ struct quad8 {
#define QUAD8_CMR_QUADRATURE_X2 0x10
#define QUAD8_CMR_QUADRATURE_X4 0x18
+/* Each Counter is 24 bits wide */
+#define LS7267_CNTR_MAX GENMASK(23, 0)
+
static int quad8_signal_read(struct counter_device *counter,
struct counter_signal *signal,
enum counter_signal_level *level)
@@ -156,18 +155,10 @@ static int quad8_count_read(struct counter_device *counter,
{
struct quad8 *const priv = counter_priv(counter);
struct channel_reg __iomem *const chan = priv->reg->channel + count->id;
- unsigned int flags;
- unsigned int borrow;
- unsigned int carry;
unsigned long irqflags;
int i;
- flags = ioread8(&chan->control);
- borrow = flags & QUAD8_FLAG_BT;
- carry = !!(flags & QUAD8_FLAG_CT);
-
- /* Borrow XOR Carry effectively doubles count range */
- *val = (unsigned long)(borrow ^ carry) << 24;
+ *val = 0;
spin_lock_irqsave(&priv->lock, irqflags);
@@ -191,8 +182,7 @@ static int quad8_count_write(struct counter_device *counter,
unsigned long irqflags;
int i;
- /* Only 24-bit values are supported */
- if (val > 0xFFFFFF)
+ if (val > LS7267_CNTR_MAX)
return -ERANGE;
spin_lock_irqsave(&priv->lock, irqflags);
@@ -378,7 +368,7 @@ static int quad8_action_read(struct counter_device *counter,
/* Handle Index signals */
if (synapse->signal->id >= 16) {
- if (priv->preset_enable[count->id])
+ if (!priv->preset_enable[count->id])
*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
else
*action = COUNTER_SYNAPSE_ACTION_NONE;
@@ -806,8 +796,7 @@ static int quad8_count_preset_write(struct counter_device *counter,
struct quad8 *const priv = counter_priv(counter);
unsigned long irqflags;
- /* Only 24-bit values are supported */
- if (preset > 0xFFFFFF)
+ if (preset > LS7267_CNTR_MAX)
return -ERANGE;
spin_lock_irqsave(&priv->lock, irqflags);
@@ -834,8 +823,7 @@ static int quad8_count_ceiling_read(struct counter_device *counter,
*ceiling = priv->preset[count->id];
break;
default:
- /* By default 0x1FFFFFF (25 bits unsigned) is maximum count */
- *ceiling = 0x1FFFFFF;
+ *ceiling = LS7267_CNTR_MAX;
break;
}
@@ -850,8 +838,7 @@ static int quad8_count_ceiling_write(struct counter_device *counter,
struct quad8 *const priv = counter_priv(counter);
unsigned long irqflags;
- /* Only 24-bit values are supported */
- if (ceiling > 0xFFFFFF)
+ if (ceiling > LS7267_CNTR_MAX)
return -ERANGE;
spin_lock_irqsave(&priv->lock, irqflags);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 73c7643b2697..8dd46fad151e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -840,22 +840,20 @@ static int amd_pstate_update_status(const char *buf, size_t size)
switch(mode_idx) {
case AMD_PSTATE_DISABLE:
- if (!current_pstate_driver)
- return -EINVAL;
- if (cppc_state == AMD_PSTATE_ACTIVE)
- return -EBUSY;
- cpufreq_unregister_driver(current_pstate_driver);
- amd_pstate_driver_cleanup();
+ if (current_pstate_driver) {
+ cpufreq_unregister_driver(current_pstate_driver);
+ amd_pstate_driver_cleanup();
+ }
break;
case AMD_PSTATE_PASSIVE:
if (current_pstate_driver) {
if (current_pstate_driver == &amd_pstate_driver)
return 0;
cpufreq_unregister_driver(current_pstate_driver);
- cppc_state = AMD_PSTATE_PASSIVE;
- current_pstate_driver = &amd_pstate_driver;
}
+ current_pstate_driver = &amd_pstate_driver;
+ cppc_state = AMD_PSTATE_PASSIVE;
ret = cpufreq_register_driver(current_pstate_driver);
break;
case AMD_PSTATE_ACTIVE:
@@ -863,10 +861,10 @@ static int amd_pstate_update_status(const char *buf, size_t size)
if (current_pstate_driver == &amd_pstate_epp_driver)
return 0;
cpufreq_unregister_driver(current_pstate_driver);
- current_pstate_driver = &amd_pstate_epp_driver;
- cppc_state = AMD_PSTATE_ACTIVE;
}
+ current_pstate_driver = &amd_pstate_epp_driver;
+ cppc_state = AMD_PSTATE_ACTIVE;
ret = cpufreq_register_driver(current_pstate_driver);
break;
default:
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 45deda18ed32..02cc2c38b44b 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -101,25 +101,40 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
BIT(CXL_CM_CAP_CAP_ID_HDM));
}
-static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
- struct cxl_endpoint_dvsec_info *info)
+static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
{
- struct device *dev = &port->dev;
struct cxl_hdm *cxlhdm;
+ void __iomem *hdm;
+ u32 ctrl;
+ int i;
- if (!info->mem_enabled)
- return ERR_PTR(-ENODEV);
+ if (!info)
+ return false;
- cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
- if (!cxlhdm)
- return ERR_PTR(-ENOMEM);
+ cxlhdm = dev_get_drvdata(&info->port->dev);
+ hdm = cxlhdm->regs.hdm_decoder;
- cxlhdm->port = port;
- cxlhdm->decoder_count = info->ranges;
- cxlhdm->target_count = info->ranges;
- dev_set_drvdata(&port->dev, cxlhdm);
+ if (!hdm)
+ return true;
- return cxlhdm;
+ /*
+ * If HDM decoders are present and the driver is in control of
+ * Mem_Enable skip DVSEC based emulation
+ */
+ if (!info->mem_enabled)
+ return false;
+
+ /*
+ * If any decoders are committed already, there should not be any
+ * emulated DVSEC decoders.
+ */
+ for (i = 0; i < cxlhdm->decoder_count; i++) {
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
+ return false;
+ }
+
+ return true;
}
/**
@@ -138,13 +153,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
-
cxlhdm->port = port;
- crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
- if (!crb) {
- if (info && info->mem_enabled)
- return devm_cxl_setup_emulated_hdm(port, info);
+ dev_set_drvdata(dev, cxlhdm);
+ crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+ if (!crb && info && info->mem_enabled) {
+ cxlhdm->decoder_count = info->ranges;
+ return cxlhdm;
+ } else if (!crb) {
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
@@ -160,7 +176,15 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
return ERR_PTR(-ENXIO);
}
- dev_set_drvdata(dev, cxlhdm);
+ /*
+ * Now that the hdm capability is parsed, decide if range
+ * register emulation is needed and fixup cxlhdm accordingly.
+ */
+ if (should_emulate_decoders(info)) {
+ dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges,
+ info->ranges > 1 ? "s" : "");
+ cxlhdm->decoder_count = info->ranges;
+ }
return cxlhdm;
}
@@ -714,14 +738,20 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
return 0;
}
-static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
- struct cxl_decoder *cxld, int which,
- struct cxl_endpoint_dvsec_info *info)
+static int cxl_setup_hdm_decoder_from_dvsec(
+ struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base,
+ int which, struct cxl_endpoint_dvsec_info *info)
{
+ struct cxl_endpoint_decoder *cxled;
+ u64 len;
+ int rc;
+
if (!is_cxl_endpoint(port))
return -EOPNOTSUPP;
- if (!range_len(&info->dvsec_range[which]))
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ len = range_len(&info->dvsec_range[which]);
+ if (!len)
return -ENOENT;
cxld->target_type = CXL_DECODER_EXPANDER;
@@ -736,40 +766,24 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;
port->commit_end = cxld->id;
- return 0;
-}
-
-static bool should_emulate_decoders(struct cxl_port *port)
-{
- struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
- void __iomem *hdm = cxlhdm->regs.hdm_decoder;
- u32 ctrl;
- int i;
-
- if (!is_cxl_endpoint(cxlhdm->port))
- return false;
-
- if (!hdm)
- return true;
-
- /*
- * If any decoders are committed already, there should not be any
- * emulated DVSEC decoders.
- */
- for (i = 0; i < cxlhdm->decoder_count; i++) {
- ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
- if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
- return false;
+ rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0);
+ if (rc) {
+ dev_err(&port->dev,
+ "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
+ port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc);
+ return rc;
}
+ *dpa_base += len;
+ cxled->state = CXL_DECODER_STATE_AUTO;
- return true;
+ return 0;
}
static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
int *target_map, void __iomem *hdm, int which,
u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
{
- struct cxl_endpoint_decoder *cxled = NULL;
+ struct cxl_endpoint_decoder *cxled;
u64 size, base, skip, dpa_size;
bool committed;
u32 remainder;
@@ -780,11 +794,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
unsigned char target_id[8];
} target_list;
- if (should_emulate_decoders(port))
- return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
-
- if (is_endpoint_decoder(&cxld->dev))
- cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ if (should_emulate_decoders(info))
+ return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base,
+ which, info);
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
@@ -806,9 +818,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
.end = base + size - 1,
};
- if (cxled && !committed && range_len(&info->dvsec_range[which]))
- return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
-
/* decoders are enabled if committed */
if (committed) {
cxld->flags |= CXL_DECODER_F_ENABLE;
@@ -846,7 +855,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
if (rc)
return rc;
- if (!cxled) {
+ if (!info) {
target_list.value =
ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
for (i = 0; i < cxld->interleave_ways; i++)
@@ -866,6 +875,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
return -ENXIO;
}
skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
if (rc) {
dev_err(&port->dev,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 7328a2552411..523d5b9fd7fc 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport)
return NULL;
}
-#define CDAT_DOE_REQ(entry_handle) \
+#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
@@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task)
}
struct cdat_doe_task {
- u32 request_pl;
- u32 response_pl[32];
+ __le32 request_pl;
+ __le32 response_pl[32];
struct completion c;
struct pci_doe_task task;
};
@@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev,
return rc;
}
wait_for_completion(&t.c);
- if (t.task.rv < sizeof(u32))
+ if (t.task.rv < 2 * sizeof(__le32))
return -EIO;
- *length = t.response_pl[1];
+ *length = le32_to_cpu(t.response_pl[1]);
dev_dbg(dev, "CDAT length %zu\n", *length);
return 0;
@@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev,
struct cxl_cdat *cdat)
{
size_t length = cdat->length;
- u32 *data = cdat->table;
+ __le32 *data = cdat->table;
int entry_handle = 0;
do {
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
+ struct cdat_entry_header *entry;
size_t entry_dw;
- u32 *entry;
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
@@ -539,26 +539,34 @@ static int cxl_cdat_read_table(struct device *dev,
return rc;
}
wait_for_completion(&t.c);
- /* 1 DW header + 1 DW data min */
- if (t.task.rv < (2 * sizeof(u32)))
+
+ /* 1 DW Table Access Response Header + CDAT entry */
+ entry = (struct cdat_entry_header *)(t.response_pl + 1);
+ if ((entry_handle == 0 &&
+ t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) ||
+ (entry_handle > 0 &&
+ (t.task.rv < sizeof(__le32) + sizeof(*entry) ||
+ t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length))))
return -EIO;
/* Get the CXL table access header entry handle */
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
- t.response_pl[0]);
- entry = t.response_pl + 1;
- entry_dw = t.task.rv / sizeof(u32);
+ le32_to_cpu(t.response_pl[0]));
+ entry_dw = t.task.rv / sizeof(__le32);
/* Skip Header */
entry_dw -= 1;
- entry_dw = min(length / sizeof(u32), entry_dw);
+ entry_dw = min(length / sizeof(__le32), entry_dw);
/* Prevent length < 1 DW from causing a buffer overflow */
if (entry_dw) {
- memcpy(data, entry, entry_dw * sizeof(u32));
- length -= entry_dw * sizeof(u32);
+ memcpy(data, entry, entry_dw * sizeof(__le32));
+ length -= entry_dw * sizeof(__le32);
data += entry_dw;
}
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
+ /* Length in CDAT header may exceed concatenation of CDAT entries */
+ cdat->length -= length;
+
return 0;
}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index c2e4b1093788..f8c38d997252 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
{
- struct cxl_port *port = find_cxl_root(start);
+ struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
struct device *dev;
if (!port)
@@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
struct device *dev;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
if (!cxl_nvb)
return -ENODEV;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 8ee6b6e2e2a4..4d1f9c5b5029 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev)
return false;
}
-/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
-static int match_root_child(struct device *dev, const void *match)
+struct cxl_port *find_cxl_root(struct cxl_port *port)
{
- const struct device *iter = NULL;
- struct cxl_dport *dport;
- struct cxl_port *port;
-
- if (!dev_is_cxl_root_child(dev))
- return 0;
-
- port = to_cxl_port(dev);
- iter = match;
- while (iter) {
- dport = cxl_find_dport_by_dev(port, iter);
- if (dport)
- break;
- iter = iter->parent;
- }
-
- return !!iter;
-}
+ struct cxl_port *iter = port;
-struct cxl_port *find_cxl_root(struct device *dev)
-{
- struct device *port_dev;
- struct cxl_port *root;
+ while (iter && !is_cxl_root(iter))
+ iter = to_cxl_port(iter->dev.parent);
- port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
- if (!port_dev)
+ if (!iter)
return NULL;
-
- root = to_cxl_port(port_dev->parent);
- get_device(&root->dev);
- put_device(port_dev);
- return root;
+ get_device(&iter->dev);
+ return iter;
}
EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f29028148806..b2fd67fcebfb 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
struct cxl_endpoint_decoder *cxled = p->targets[i];
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *iter = cxled_to_port(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_ep *ep;
int rc = 0;
+ if (cxlds->rcd)
+ goto endpoint_reset;
+
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
iter = to_cxl_port(iter->dev.parent);
@@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
return rc;
}
+endpoint_reset:
rc = cxled->cxld.reset(&cxled->cxld);
if (rc)
return rc;
@@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_endpoint_decoder *cxled;
+ struct cxl_dev_state *cxlds;
struct cxl_memdev *cxlmd;
struct cxl_port *iter;
struct cxl_ep *ep;
@@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
for (i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
cxlmd = cxled_to_memdev(cxled);
+ cxlds = cxlmd->cxlds;
+
+ if (cxlds->rcd)
+ continue;
iter = cxled_to_port(cxled);
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_endpoint_decoder *cxled;
+ struct cxl_dev_state *cxlds;
+ int i, rc, rch = 0, vh = 0;
struct cxl_memdev *cxlmd;
struct cxl_port *iter;
struct cxl_ep *ep;
- int i, rc;
for (i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
cxlmd = cxled_to_memdev(cxled);
+ cxlds = cxlmd->cxlds;
+
+ /* validate that all targets agree on topology */
+ if (!cxlds->rcd) {
+ vh++;
+ } else {
+ rch++;
+ continue;
+ }
iter = cxled_to_port(cxled);
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
}
}
+ if (rch && vh) {
+ dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
+ cxl_region_teardown_targets(cxlr);
+ return -ENXIO;
+ }
+
return 0;
}
@@ -1648,6 +1674,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
if (rc)
goto err_decrement;
p->state = CXL_CONFIG_ACTIVE;
+ set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
}
cxled->cxld.interleave_ways = p->interleave_ways;
@@ -1749,8 +1776,6 @@ static int attach_target(struct cxl_region *cxlr,
down_read(&cxl_dpa_rwsem);
rc = cxl_region_attach(cxlr, cxled, pos);
- if (rc == 0)
- set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
return rc;
@@ -2251,7 +2276,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
if (!cxl_nvb) {
cxlr_pmem = ERR_PTR(-ENODEV);
goto out;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f2b0962a552d..044a92d9813e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport);
-struct cxl_port *find_cxl_root(struct device *dev);
+struct cxl_port *find_cxl_root(struct cxl_port *port);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
void cxl_bus_rescan(void);
void cxl_bus_drain(void);
@@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
/**
* struct cxl_endpoint_dvsec_info - Cached DVSEC info
- * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
+ * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
* @ranges: Number of active HDM ranges this device uses.
+ * @port: endpoint port associated with this info instance
* @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
*/
struct cxl_endpoint_dvsec_info {
bool mem_enabled;
int ranges;
+ struct cxl_port *port;
struct range dvsec_range[2];
};
@@ -758,7 +760,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index be6a2ef3cce3..0465ef963cd6 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -68,6 +68,20 @@ enum cxl_regloc_type {
CXL_REGLOC_RBI_TYPES
};
+struct cdat_header {
+ __le32 length;
+ u8 revision;
+ u8 checksum;
+ u8 reserved[6];
+ __le32 sequence;
+} __packed;
+
+struct cdat_entry_header {
+ u8 type;
+ u8 reserved;
+ __le16 length;
+} __packed;
+
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 1049bb5ea496..22a7ab2bae7c 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port)
static int cxl_endpoint_port_probe(struct cxl_port *port)
{
+ struct cxl_endpoint_dvsec_info info = { .port = port };
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
- struct cxl_endpoint_dvsec_info info = { 0 };
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_hdm *cxlhdm;
struct cxl_port *root;
@@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
* This can't fail in practice as CXL root exit unregisters all
* descendant ports and that in turn synchronizes with cxl_port_probe()
*/
- root = find_cxl_root(&cxlmd->dev);
+ root = find_cxl_root(port);
/*
* Now that all endpoint decoders are successfully enumerated, try to
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index 90f28bda29c8..4cf8da77bdd9 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -75,6 +75,7 @@
#define REG_TX_INTSTATE(idx) (0x0030 + (idx) * 4)
#define REG_RX_INTSTATE(idx) (0x0040 + (idx) * 4)
+#define REG_GLOBAL_INTSTATE(idx) (0x0050 + (idx) * 4)
#define REG_CHAN_INTSTATUS(ch, idx) (0x8010 + (ch) * 0x200 + (idx) * 4)
#define REG_CHAN_INTMASK(ch, idx) (0x8020 + (ch) * 0x200 + (idx) * 4)
@@ -511,7 +512,10 @@ static int admac_terminate_all(struct dma_chan *chan)
admac_stop_chan(adchan);
admac_reset_rings(adchan);
- adchan->current_tx = NULL;
+ if (adchan->current_tx) {
+ list_add_tail(&adchan->current_tx->node, &adchan->to_free);
+ adchan->current_tx = NULL;
+ }
/*
* Descriptors can only be freed after the tasklet
* has been killed (in admac_synchronize).
@@ -672,13 +676,14 @@ static void admac_handle_chan_int(struct admac_data *ad, int no)
static irqreturn_t admac_interrupt(int irq, void *devid)
{
struct admac_data *ad = devid;
- u32 rx_intstate, tx_intstate;
+ u32 rx_intstate, tx_intstate, global_intstate;
int i;
rx_intstate = readl_relaxed(ad->base + REG_RX_INTSTATE(ad->irq_index));
tx_intstate = readl_relaxed(ad->base + REG_TX_INTSTATE(ad->irq_index));
+ global_intstate = readl_relaxed(ad->base + REG_GLOBAL_INTSTATE(ad->irq_index));
- if (!tx_intstate && !rx_intstate)
+ if (!tx_intstate && !rx_intstate && !global_intstate)
return IRQ_NONE;
for (i = 0; i < ad->nchannels; i += 2) {
@@ -693,6 +698,12 @@ static irqreturn_t admac_interrupt(int irq, void *devid)
rx_intstate >>= 1;
}
+ if (global_intstate) {
+ dev_warn(ad->dev, "clearing unknown global interrupt flag: %x\n",
+ global_intstate);
+ writel_relaxed(~(u32) 0, ad->base + REG_GLOBAL_INTSTATE(ad->irq_index));
+ }
+
return IRQ_HANDLED;
}
@@ -850,6 +861,9 @@ static int admac_probe(struct platform_device *pdev)
dma->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index c24bca210104..826b98284fa1 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1342,7 +1342,7 @@ int dmaenginem_async_device_register(struct dma_device *device)
if (ret)
return ret;
- return devm_add_action(device->dev, dmaenginem_async_device_unregister, device);
+ return devm_add_action_or_reset(device->dev, dmaenginem_async_device_unregister, device);
}
EXPORT_SYMBOL(dmaenginem_async_device_register);
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index 462109c61653..93ee298d52b8 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -277,7 +277,7 @@ failed:
/**
* xdma_xfer_start - Start DMA transfer
- * @xdma_chan: DMA channel pointer
+ * @xchan: DMA channel pointer
*/
static int xdma_xfer_start(struct xdma_chan *xchan)
{
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 29619f49873a..d9629ff87861 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -167,7 +167,8 @@ int psci_set_osi_mode(bool enable)
err = invoke_psci_fn(PSCI_1_0_FN_SET_SUSPEND_MODE, suspend_mode, 0, 0);
if (err < 0)
- pr_warn("failed to set %s mode: %d\n", enable ? "OSI" : "PC", err);
+ pr_info(FW_BUG "failed to set %s mode: %d\n",
+ enable ? "OSI" : "PC", err);
return psci_to_linux_errno(err);
}
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 13be729710f2..badbe0582318 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -100,7 +100,7 @@ config GPIO_GENERIC
tristate
config GPIO_REGMAP
- depends on REGMAP
+ select REGMAP
tristate
# put drivers in the right section, in alphabetical order
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 26b1f7465e09..43b2dc8821e6 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -324,7 +324,7 @@ static struct irq_chip gpio_irqchip = {
.irq_enable = gpio_irq_enable,
.irq_disable = gpio_irq_disable,
.irq_set_type = gpio_irq_type,
- .flags = IRQCHIP_SET_TYPE_MASKED,
+ .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE,
};
static void gpio_irq_handler(struct irq_desc *desc)
@@ -641,9 +641,6 @@ static void davinci_gpio_save_context(struct davinci_gpio_controller *chips,
context->set_falling = readl_relaxed(&g->set_falling);
}
- /* Clear Bank interrupt enable bit */
- writel_relaxed(0, base + BINTEN);
-
/* Clear all interrupt status registers */
writel_relaxed(GENMASK(31, 0), &g->intstat);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 1583157da355..efd025d8961e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -177,6 +177,40 @@ void dm_helpers_dp_update_branch_info(
const struct dc_link *link)
{}
+static void dm_helpers_construct_old_payload(
+ struct dc_link *link,
+ int pbn_per_slot,
+ struct drm_dp_mst_atomic_payload *new_payload,
+ struct drm_dp_mst_atomic_payload *old_payload)
+{
+ struct link_mst_stream_allocation_table current_link_table =
+ link->mst_stream_alloc_table;
+ struct link_mst_stream_allocation *dc_alloc;
+ int i;
+
+ *old_payload = *new_payload;
+
+ /* Set correct time_slots/PBN of old payload.
+ * other fields (delete & dsc_enabled) in
+ * struct drm_dp_mst_atomic_payload are don't care fields
+ * while calling drm_dp_remove_payload()
+ */
+ for (i = 0; i < current_link_table.stream_count; i++) {
+ dc_alloc =
+ &current_link_table.stream_allocations[i];
+
+ if (dc_alloc->vcp_id == new_payload->vcpi) {
+ old_payload->time_slots = dc_alloc->slot_count;
+ old_payload->pbn = dc_alloc->slot_count * pbn_per_slot;
+ break;
+ }
+ }
+
+ /* make sure there is an old payload*/
+ ASSERT(i != current_link_table.stream_count);
+
+}
+
/*
* Writes payload allocation table in immediate downstream device.
*/
@@ -188,7 +222,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
{
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state;
- struct drm_dp_mst_atomic_payload *payload;
+ struct drm_dp_mst_atomic_payload *target_payload, *new_payload, old_payload;
struct drm_dp_mst_topology_mgr *mst_mgr;
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
@@ -204,17 +238,26 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
/* It's OK for this to fail */
- payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
- if (enable)
- drm_dp_add_payload_part1(mst_mgr, mst_state, payload);
- else
- drm_dp_remove_payload(mst_mgr, mst_state, payload, payload);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+
+ if (enable) {
+ target_payload = new_payload;
+
+ drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
+ } else {
+ /* construct old payload by VCPI*/
+ dm_helpers_construct_old_payload(stream->link, mst_state->pbn_div,
+ new_payload, &old_payload);
+ target_payload = &old_payload;
+
+ drm_dp_remove_payload(mst_mgr, mst_state, &old_payload, new_payload);
+ }
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
* AUX message. The sequence is slot 1-63 allocated sequence for each
* stream. AMD ASIC stream slot allocation should follow the same
* sequence. copy DRM MST allocation to dc */
- fill_dc_mst_payload_table_from_drm(stream->link, enable, payload, proposed_table);
+ fill_dc_mst_payload_table_from_drm(stream->link, enable, target_payload, proposed_table);
return true;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index f085cb97a620..85a090b9e3d9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -61,6 +61,12 @@
#define CTF_OFFSET_HOTSPOT 5
#define CTF_OFFSET_MEM 5
+static const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5};
+static const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16};
+
+#define DECODE_GEN_SPEED(gen_speed_idx) (pmfw_decoded_link_speed[gen_speed_idx])
+#define DECODE_LANE_WIDTH(lane_width_idx) (pmfw_decoded_link_width[lane_width_idx])
+
struct smu_13_0_max_sustainable_clocks {
uint32_t display_clock;
uint32_t phy_clock;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 27448ffe60a4..a5c97d61e92a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1144,8 +1144,8 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
(pcie_table->pcie_lane[i] == 5) ? "x12" :
(pcie_table->pcie_lane[i] == 6) ? "x16" : "",
pcie_table->clk_freq[i],
- ((gen_speed - 1) == pcie_table->pcie_gen[i]) &&
- (lane_width == link_width[pcie_table->pcie_lane[i]]) ?
+ (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
+ (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
"*" : "");
break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 9e1967d8049e..4399416dd9b8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu)
dpm_table);
if (ret)
return ret;
+
+ if (skutable->DriverReportedClocks.GameClockAc &&
+ (dpm_table->dpm_levels[dpm_table->count - 1].value >
+ skutable->DriverReportedClocks.GameClockAc)) {
+ dpm_table->dpm_levels[dpm_table->count - 1].value =
+ skutable->DriverReportedClocks.GameClockAc;
+ dpm_table->max = skutable->DriverReportedClocks.GameClockAc;
+ }
} else {
dpm_table->count = 1;
dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
@@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu,
return ret;
}
+static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ struct smu_13_0_dpm_context *dpm_context =
+ smu->smu_dpm.dpm_context;
+ struct smu_13_0_dpm_table *dpm_table;
+
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ /* uclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.uclk_table;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ /* gfxclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.gfx_table;
+ break;
+ case SMU_SOCCLK:
+ /* socclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.soc_table;
+ break;
+ case SMU_FCLK:
+ /* fclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.fclk_table;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ /* vclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.vclk_table;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ /* dclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.dclk_table;
+ break;
+ default:
+ dev_err(smu->adev->dev, "Unsupported clock type!\n");
+ return -EINVAL;
+ }
+
+ if (min)
+ *min = dpm_table->min;
+ if (max)
+ *max = dpm_table->max;
+
+ return 0;
+}
+
static int smu_v13_0_7_read_sensor(struct smu_context *smu,
enum amd_pp_sensors sensor,
void *data,
@@ -1074,8 +1133,8 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu,
(pcie_table->pcie_lane[i] == 5) ? "x12" :
(pcie_table->pcie_lane[i] == 6) ? "x16" : "",
pcie_table->clk_freq[i],
- (gen_speed == pcie_table->pcie_gen[i]) &&
- (lane_width == pcie_table->pcie_lane[i]) ?
+ (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
+ (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
"*" : "");
break;
@@ -1329,9 +1388,17 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu)
&dpm_context->dpm_tables.fclk_table;
struct smu_umd_pstate_table *pstate_table =
&smu->pstate_table;
+ struct smu_table_context *table_context = &smu->smu_table;
+ PPTable_t *pptable = table_context->driver_pptable;
+ DriverReportedClocks_t driver_clocks =
+ pptable->SkuTable.DriverReportedClocks;
pstate_table->gfxclk_pstate.min = gfx_table->min;
- pstate_table->gfxclk_pstate.peak = gfx_table->max;
+ if (driver_clocks.GameClockAc &&
+ (driver_clocks.GameClockAc < gfx_table->max))
+ pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc;
+ else
+ pstate_table->gfxclk_pstate.peak = gfx_table->max;
pstate_table->uclk_pstate.min = mem_table->min;
pstate_table->uclk_pstate.peak = mem_table->max;
@@ -1348,12 +1415,12 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu)
pstate_table->fclk_pstate.min = fclk_table->min;
pstate_table->fclk_pstate.peak = fclk_table->max;
- /*
- * For now, just use the mininum clock frequency.
- * TODO: update them when the real pstate settings available
- */
- pstate_table->gfxclk_pstate.standard = gfx_table->min;
- pstate_table->uclk_pstate.standard = mem_table->min;
+ if (driver_clocks.BaseClockAc &&
+ driver_clocks.BaseClockAc < gfx_table->max)
+ pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc;
+ else
+ pstate_table->gfxclk_pstate.standard = gfx_table->max;
+ pstate_table->uclk_pstate.standard = mem_table->max;
pstate_table->socclk_pstate.standard = soc_table->min;
pstate_table->vclk_pstate.standard = vclk_table->min;
pstate_table->dclk_pstate.standard = dclk_table->min;
@@ -1676,7 +1743,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable,
.init_pptable_microcode = smu_v13_0_init_pptable_microcode,
.populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk,
- .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq,
.get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
.read_sensor = smu_v13_0_7_read_sensor,
.feature_is_enabled = smu_cmn_feature_is_enabled,
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 0643887800b4..142668cd6d7c 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -99,7 +99,6 @@ static int armada_drm_bind(struct device *dev)
if (ret) {
dev_err(dev, "[" DRM_NAME ":%s] can't kick out simple-fb: %d\n",
__func__, ret);
- kfree(priv);
return ret;
}
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 468a792e6a40..fc0eaf40dc94 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -300,9 +300,21 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+ i915_reg_t dss_ctl1_reg, dss_ctl2_reg;
u32 dss_ctl1;
- dss_ctl1 = intel_de_read(dev_priv, DSS_CTL1);
+ /* FIXME: Move all DSS handling to intel_vdsc.c */
+ if (DISPLAY_VER(dev_priv) >= 12) {
+ struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+
+ dss_ctl1_reg = ICL_PIPE_DSS_CTL1(crtc->pipe);
+ dss_ctl2_reg = ICL_PIPE_DSS_CTL2(crtc->pipe);
+ } else {
+ dss_ctl1_reg = DSS_CTL1;
+ dss_ctl2_reg = DSS_CTL2;
+ }
+
+ dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg);
dss_ctl1 |= SPLITTER_ENABLE;
dss_ctl1 &= ~OVERLAP_PIXELS_MASK;
dss_ctl1 |= OVERLAP_PIXELS(intel_dsi->pixel_overlap);
@@ -323,16 +335,16 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
dss_ctl1 &= ~LEFT_DL_BUF_TARGET_DEPTH_MASK;
dss_ctl1 |= LEFT_DL_BUF_TARGET_DEPTH(dl_buffer_depth);
- dss_ctl2 = intel_de_read(dev_priv, DSS_CTL2);
+ dss_ctl2 = intel_de_read(dev_priv, dss_ctl2_reg);
dss_ctl2 &= ~RIGHT_DL_BUF_TARGET_DEPTH_MASK;
dss_ctl2 |= RIGHT_DL_BUF_TARGET_DEPTH(dl_buffer_depth);
- intel_de_write(dev_priv, DSS_CTL2, dss_ctl2);
+ intel_de_write(dev_priv, dss_ctl2_reg, dss_ctl2);
} else {
/* Interleave */
dss_ctl1 |= DUAL_LINK_MODE_INTERLEAVE;
}
- intel_de_write(dev_priv, DSS_CTL1, dss_ctl1);
+ intel_de_write(dev_priv, dss_ctl1_reg, dss_ctl1);
}
/* aka DSI 8X clock */
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 2106b3de225a..7c9b328bc2d7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -232,7 +232,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
return slots;
}
- intel_link_compute_m_n(crtc_state->pipe_bpp,
+ intel_link_compute_m_n(crtc_state->dsc.compressed_bpp,
crtc_state->lane_count,
adjusted_mode->crtc_clock,
crtc_state->port_clock,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 7420276827a5..4758f21c91e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1067,11 +1067,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
.interruptible = true,
.no_wait_gpu = true, /* should be idle already */
};
+ int err;
GEM_BUG_ON(!bo->ttm || !(bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED));
- ret = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
- if (ret) {
+ err = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
+ if (err) {
dma_resv_unlock(bo->base.resv);
return VM_FAULT_SIGBUS;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1bbe6708d0a7..750326434677 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2018,6 +2018,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
* inspecting the queue to see if we need to resumbit.
*/
if (*prev != *execlists->active) { /* elide lite-restores */
+ struct intel_context *prev_ce = NULL, *active_ce = NULL;
+
/*
* Note the inherent discrepancy between the HW runtime,
* recorded as part of the context switch, and the CPU
@@ -2029,9 +2031,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
* and correct overselves later when updating from HW.
*/
if (*prev)
- lrc_runtime_stop((*prev)->context);
+ prev_ce = (*prev)->context;
if (*execlists->active)
- lrc_runtime_start((*execlists->active)->context);
+ active_ce = (*execlists->active)->context;
+ if (prev_ce != active_ce) {
+ if (prev_ce)
+ lrc_runtime_stop(prev_ce);
+ if (active_ce)
+ lrc_runtime_start(active_ce);
+ }
new_timeslice(execlists);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 410905da8e97..0c103ca160d1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -235,6 +235,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc)
i915_sw_fence_fini(&huc->delayed_load.fence);
}
+int intel_huc_sanitize(struct intel_huc *huc)
+{
+ delayed_huc_load_complete(huc);
+ intel_uc_fw_sanitize(&huc->fw);
+ return 0;
+}
+
static bool vcs_supported(struct intel_gt *gt)
{
intel_engine_mask_t mask = gt->info.engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 52db03620c60..db555b3c1f56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,6 +41,7 @@ struct intel_huc {
} delayed_load;
};
+int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
@@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
-static inline int intel_huc_sanitize(struct intel_huc *huc)
-{
- intel_uc_fw_sanitize(&huc->fw);
- return 0;
-}
-
static inline bool intel_huc_is_supported(struct intel_huc *huc)
{
return intel_uc_fw_is_supported(&huc->fw);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 283a4a3c6862..004074936300 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4638,13 +4638,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
err = oa_config->id;
goto sysfs_err;
}
-
- mutex_unlock(&perf->metrics_lock);
+ id = oa_config->id;
drm_dbg(&perf->i915->drm,
"Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+ mutex_unlock(&perf->metrics_lock);
- return oa_config->id;
+ return id;
sysfs_err:
mutex_unlock(&perf->metrics_lock);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index ed9d374147b8..5bb777ff1313 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -363,6 +363,35 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder,
return 0;
}
+static void
+nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state)
+{
+ struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
+ struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+ struct drm_display_mode *mode = &asyh->state.adjusted_mode;
+ unsigned int max_rate, mode_rate;
+
+ switch (nv_encoder->dcb->type) {
+ case DCB_OUTPUT_DP:
+ max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw;
+
+ /* we don't support more than 10 anyway */
+ asyh->or.bpc = min_t(u8, asyh->or.bpc, 10);
+
+ /* reduce the bpc until it works out */
+ while (asyh->or.bpc > 6) {
+ mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8);
+ if (mode_rate <= max_rate)
+ break;
+
+ asyh->or.bpc -= 2;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
static int
nv50_outp_atomic_check(struct drm_encoder *encoder,
struct drm_crtc_state *crtc_state,
@@ -381,6 +410,9 @@ nv50_outp_atomic_check(struct drm_encoder *encoder,
if (crtc_state->mode_changed || crtc_state->connectors_changed)
asyh->or.bpc = connector->display_info.bpc;
+ /* We might have to reduce the bpc */
+ nv50_outp_atomic_fix_depth(encoder, crtc_state);
+
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index e00876f92aee..d49b4875fc3c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -263,8 +263,6 @@ nouveau_dp_irq(struct work_struct *work)
}
/* TODO:
- * - Use the minimum possible BPC here, once we add support for the max bpc
- * property.
* - Validate against the DP caps advertised by the GPU (we don't check these
* yet)
*/
@@ -276,7 +274,11 @@ nv50_dp_mode_valid(struct drm_connector *connector,
{
const unsigned int min_clock = 25000;
unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock;
- const u8 bpp = connector->display_info.bpc * 3;
+ /* Check with the minmum bpc always, so we can advertise better modes.
+ * In particlar not doing this causes modes to be dropped on HDR
+ * displays as we might check with a bpc of 16 even.
+ */
+ const u8 bpp = 6 * 3;
if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace)
return MODE_NO_INTERLACE;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c
index 76678dd60f93..c4c6f67af7cc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c
@@ -31,6 +31,7 @@ gf108_fb = {
.init = gf100_fb_init,
.init_page = gf100_fb_init_page,
.intr = gf100_fb_intr,
+ .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
.ram_new = gf108_ram_new,
.default_bigpage = 17,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
index f73442ccb424..433fa966ba23 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
@@ -77,6 +77,7 @@ gk104_fb = {
.init = gf100_fb_init,
.init_page = gf100_fb_init_page,
.intr = gf100_fb_intr,
+ .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
.ram_new = gk104_ram_new,
.default_bigpage = 17,
.clkgate_pack = gk104_fb_clkgate_pack,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c
index 45d6cdffafee..4dc283dedf8b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c
@@ -59,6 +59,7 @@ gk110_fb = {
.init = gf100_fb_init,
.init_page = gf100_fb_init_page,
.intr = gf100_fb_intr,
+ .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
.ram_new = gk104_ram_new,
.default_bigpage = 17,
.clkgate_pack = gk110_fb_clkgate_pack,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
index de52462a92bf..90bfff616d35 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
@@ -31,6 +31,7 @@ gm107_fb = {
.init = gf100_fb_init,
.init_page = gf100_fb_init_page,
.intr = gf100_fb_intr,
+ .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
.ram_new = gm107_ram_new,
.default_bigpage = 17,
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 666a5e53fe19..e961fa27702c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -504,6 +504,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
if (IS_ERR(pages[i])) {
mutex_unlock(&bo->base.pages_lock);
ret = PTR_ERR(pages[i]);
+ pages[i] = NULL;
goto err_pages;
}
}
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 15d04a0ec623..e0a8890a62e2 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -507,12 +507,19 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
{
struct drm_sched_entity *entity = sched_job->entity;
bool first;
+ ktime_t submit_ts;
trace_drm_sched_job(sched_job, entity);
atomic_inc(entity->rq->sched->score);
WRITE_ONCE(entity->last_user, current->group_leader);
+
+ /*
+ * After the sched_job is pushed into the entity queue, it may be
+ * completed and freed up at any time. We can no longer access it.
+ * Make sure to set the submit_ts first, to avoid a race.
+ */
+ sched_job->submit_ts = submit_ts = ktime_get();
first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
- sched_job->submit_ts = ktime_get();
/* first job wakes up scheduler */
if (first) {
@@ -529,7 +536,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
spin_unlock(&entity->rq_lock);
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
- drm_sched_rq_update_fifo(entity, sched_job->submit_ts);
+ drm_sched_rq_update_fifo(entity, submit_ts);
drm_sched_wakeup(entity->rq->sched);
}
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 82f64fb31fda..4ce012f83253 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -1122,7 +1122,7 @@ config HID_TOPRE
tristate "Topre REALFORCE keyboards"
depends on HID
help
- Say Y for N-key rollover support on Topre REALFORCE R2 108 key keyboards.
+ Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key keyboards.
config HID_THINGM
tristate "ThingM blink(1) USB RGB LED"
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index 8a034a555d4c..d9ef45fcaeab 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -342,9 +342,6 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx)
{
struct hid_bpf_ctx_kern *ctx_kern;
- if (!ctx)
- return;
-
ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx);
kfree(ctx_kern);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 63545cd307e5..c2e9b6d1fd7d 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -420,6 +420,9 @@
#define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A
#define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C
#define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F
+#define I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100 0x29F5
+#define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1 0x2BED
+#define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2 0x2BEE
#define USB_VENDOR_ID_ELECOM 0x056e
#define USB_DEVICE_ID_ELECOM_BM084 0x0061
@@ -1249,6 +1252,7 @@
#define USB_VENDOR_ID_TOPRE 0x0853
#define USB_DEVICE_ID_TOPRE_REALFORCE_R2_108 0x0148
+#define USB_DEVICE_ID_TOPRE_REALFORCE_R2_87 0x0146
#define USB_VENDOR_ID_TOPSEED 0x0766
#define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 7fc967964dd8..5c65a584b3fa 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -398,6 +398,12 @@ static const struct hid_device_id hid_battery_quirks[] = {
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN),
HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100),
+ HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1),
+ HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2),
+ HID_BATTERY_QUIRK_IGNORE },
{}
};
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index 3e3f89e01d81..d85398721659 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -940,7 +940,7 @@ hid_sensor_register_platform_device(struct platform_device *pdev,
struct hid_sensor_hub_device *hsdev,
const struct hid_sensor_custom_match *match)
{
- char real_usage[HID_SENSOR_USAGE_LENGTH];
+ char real_usage[HID_SENSOR_USAGE_LENGTH] = { 0 };
struct platform_device *custom_pdev;
const char *dev_name;
char *c;
diff --git a/drivers/hid/hid-topre.c b/drivers/hid/hid-topre.c
index 88a91cdad5f8..d1d5ca310ead 100644
--- a/drivers/hid/hid-topre.c
+++ b/drivers/hid/hid-topre.c
@@ -36,6 +36,8 @@ static __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc,
static const struct hid_device_id topre_id_table[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPRE,
USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE,
+ USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) },
{ }
};
MODULE_DEVICE_TABLE(hid, topre_id_table);
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index 81385ab37fa9..7fc738a22375 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -241,8 +241,8 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv)
struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv);
- return guid_equal(&driver->id[0].guid,
- &device->fw_client->props.protocol_name);
+ return(device->fw_client ? guid_equal(&driver->id[0].guid,
+ &device->fw_client->props.protocol_name) : 0);
}
/**
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 9dc27e5d367a..da51b50787df 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -409,6 +409,10 @@ void vmbus_disconnect(void)
*/
struct vmbus_channel *relid2channel(u32 relid)
{
+ if (vmbus_connection.channels == NULL) {
+ pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid);
+ return NULL;
+ }
if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
return NULL;
return READ_ONCE(vmbus_connection.channels[relid]);
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index d193ed3cb35e..c7ebef1990c8 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -174,7 +174,7 @@ static int hwmon_thermal_set_trips(struct thermal_zone_device *tz, int low, int
struct hwmon_thermal_data *tdata = tz->devdata;
struct hwmon_device *hwdev = to_hwmon_device(tdata->dev);
const struct hwmon_chip_info *chip = hwdev->chip;
- const struct hwmon_channel_info **info = chip->info;
+ const struct hwmon_channel_info * const *info = chip->info;
unsigned int i;
int err;
@@ -253,7 +253,7 @@ static int hwmon_thermal_register_sensors(struct device *dev)
{
struct hwmon_device *hwdev = to_hwmon_device(dev);
const struct hwmon_chip_info *chip = hwdev->chip;
- const struct hwmon_channel_info **info = chip->info;
+ const struct hwmon_channel_info * const *info = chip->info;
void *drvdata = dev_get_drvdata(dev);
int i;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 1ea8f173cca0..4c15fae534f3 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -472,7 +472,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
if (etm4x_sspcicrn_present(drvdata, i))
etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
}
- for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+ for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
}
@@ -1070,25 +1070,21 @@ static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
struct csdev_access *csa)
{
u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
- u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1);
/*
* All ETMs must implement TRCDEVARCH to indicate that
- * the component is an ETMv4. To support any broken
- * implementations we fall back to TRCIDR1 check, which
- * is not really reliable.
+ * the component is an ETMv4. Even though TRCIDR1 also
+ * contains the information, it is part of the "Trace"
+ * register and must be accessed with the OSLK cleared,
+ * with MMIO. But we cannot touch the OSLK until we are
+ * sure this is an ETM. So rely only on the TRCDEVARCH.
*/
- if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) {
- drvdata->arch = etm_devarch_to_arch(devarch);
- } else {
- pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back to TRCIDR1\n",
- smp_processor_id(), devarch);
-
- if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4)
- return false;
- drvdata->arch = etm_trcidr_to_arch(idr1);
+ if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH) {
+ pr_warn_once("TRCDEVARCH doesn't match ETMv4 architecture\n");
+ return false;
}
+ drvdata->arch = etm_devarch_to_arch(devarch);
*csa = CSDEV_ACCESS_IOMEM(drvdata->base);
return true;
}
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 434f4e95ee17..27c8a9901868 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -753,14 +753,12 @@
* TRCDEVARCH - CoreSight architected register
* - Bits[15:12] - Major version
* - Bits[19:16] - Minor version
- * TRCIDR1 - ETM architected register
- * - Bits[11:8] - Major version
- * - Bits[7:4] - Minor version
- * We must rely on TRCDEVARCH for the version information,
- * however we don't want to break the support for potential
- * old implementations which might not implement it. Thus
- * we fall back to TRCIDR1 if TRCDEVARCH is not implemented
- * for memory mapped components.
+ *
+ * We must rely only on TRCDEVARCH for the version information. Even though,
+ * TRCIDR1 also provides the architecture version, it is a "Trace" register
+ * and as such must be accessed only with Trace power domain ON. This may
+ * not be available at probe time.
+ *
* Now to make certain decisions easier based on the version
* we use an internal representation of the version in the
* driver, as follows :
@@ -786,12 +784,6 @@ static inline u8 etm_devarch_to_arch(u32 devarch)
ETM_DEVARCH_REVISION(devarch));
}
-static inline u8 etm_trcidr_to_arch(u32 trcidr1)
-{
- return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1),
- ETM_TRCIDR1_ARCH_MINOR(trcidr1));
-}
-
enum etm_impdef_type {
ETM4_IMPDEF_HISI_CORE_COMMIT,
ETM4_IMPDEF_FEATURE_MAX,
diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
index 09af75921147..b21ffd6df927 100644
--- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
+++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
@@ -48,9 +48,9 @@
* SR_HOLD_TIME_XK_TICKS field will indicate the number of ticks of the
* baud clock required to program 'Hold Time' at X KHz.
*/
-#define SR_HOLD_TIME_100K_TICKS 133
-#define SR_HOLD_TIME_400K_TICKS 20
-#define SR_HOLD_TIME_1000K_TICKS 11
+#define SR_HOLD_TIME_100K_TICKS 150
+#define SR_HOLD_TIME_400K_TICKS 20
+#define SR_HOLD_TIME_1000K_TICKS 12
#define SMB_CORE_COMPLETION_REG_OFF3 (SMBUS_MAST_CORE_ADDR_BASE + 0x23)
@@ -65,17 +65,17 @@
* the baud clock required to program 'fair idle delay' at X KHz. Fair idle
* delay establishes the MCTP T(IDLE_DELAY) period.
*/
-#define FAIR_BUS_IDLE_MIN_100K_TICKS 969
-#define FAIR_BUS_IDLE_MIN_400K_TICKS 157
-#define FAIR_BUS_IDLE_MIN_1000K_TICKS 157
+#define FAIR_BUS_IDLE_MIN_100K_TICKS 992
+#define FAIR_BUS_IDLE_MIN_400K_TICKS 500
+#define FAIR_BUS_IDLE_MIN_1000K_TICKS 500
/*
* FAIR_IDLE_DELAY_XK_TICKS field will indicate the number of ticks of the
* baud clock required to satisfy the fairness protocol at X KHz.
*/
-#define FAIR_IDLE_DELAY_100K_TICKS 1000
-#define FAIR_IDLE_DELAY_400K_TICKS 500
-#define FAIR_IDLE_DELAY_1000K_TICKS 500
+#define FAIR_IDLE_DELAY_100K_TICKS 963
+#define FAIR_IDLE_DELAY_400K_TICKS 156
+#define FAIR_IDLE_DELAY_1000K_TICKS 156
#define SMB_IDLE_SCALING_100K \
((FAIR_IDLE_DELAY_100K_TICKS << 16) | FAIR_BUS_IDLE_MIN_100K_TICKS)
@@ -105,7 +105,7 @@
*/
#define BUS_CLK_100K_LOW_PERIOD_TICKS 156
#define BUS_CLK_400K_LOW_PERIOD_TICKS 41
-#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15
+#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15
/*
* BUS_CLK_XK_HIGH_PERIOD_TICKS field defines the number of I2C Baud Clock
@@ -131,7 +131,7 @@
*/
#define CLK_SYNC_100K 4
#define CLK_SYNC_400K 4
-#define CLK_SYNC_1000K 4
+#define CLK_SYNC_1000K 4
#define SMB_CORE_DATA_TIMING_REG_OFF (SMBUS_MAST_CORE_ADDR_BASE + 0x40)
@@ -142,25 +142,25 @@
* determines the SCLK hold time following SDAT driven low during the first
* START bit in a transfer.
*/
-#define FIRST_START_HOLD_100K_TICKS 22
-#define FIRST_START_HOLD_400K_TICKS 16
-#define FIRST_START_HOLD_1000K_TICKS 6
+#define FIRST_START_HOLD_100K_TICKS 23
+#define FIRST_START_HOLD_400K_TICKS 8
+#define FIRST_START_HOLD_1000K_TICKS 12
/*
* STOP_SETUP_XK_TICKS will indicate the number of ticks of the baud clock
* required to program 'STOP_SETUP' timer at X KHz. This timer determines the
* SDAT setup time from the rising edge of SCLK for a STOP condition.
*/
-#define STOP_SETUP_100K_TICKS 157
+#define STOP_SETUP_100K_TICKS 150
#define STOP_SETUP_400K_TICKS 20
-#define STOP_SETUP_1000K_TICKS 12
+#define STOP_SETUP_1000K_TICKS 12
/*
* RESTART_SETUP_XK_TICKS will indicate the number of ticks of the baud clock
* required to program 'RESTART_SETUP' timer at X KHz. This timer determines the
* SDAT setup time from the rising edge of SCLK for a repeated START condition.
*/
-#define RESTART_SETUP_100K_TICKS 157
+#define RESTART_SETUP_100K_TICKS 156
#define RESTART_SETUP_400K_TICKS 20
#define RESTART_SETUP_1000K_TICKS 12
@@ -169,7 +169,7 @@
* required to program 'DATA_HOLD' timer at X KHz. This timer determines the
* SDAT hold time following SCLK driven low.
*/
-#define DATA_HOLD_100K_TICKS 2
+#define DATA_HOLD_100K_TICKS 12
#define DATA_HOLD_400K_TICKS 2
#define DATA_HOLD_1000K_TICKS 2
@@ -190,35 +190,35 @@
* Bus Idle Minimum time = BUS_IDLE_MIN[7:0] x Baud_Clock_Period x
* (BUS_IDLE_MIN_XK_TICKS[7] ? 4,1)
*/
-#define BUS_IDLE_MIN_100K_TICKS 167UL
-#define BUS_IDLE_MIN_400K_TICKS 139UL
-#define BUS_IDLE_MIN_1000K_TICKS 133UL
+#define BUS_IDLE_MIN_100K_TICKS 36UL
+#define BUS_IDLE_MIN_400K_TICKS 10UL
+#define BUS_IDLE_MIN_1000K_TICKS 4UL
/*
* CTRL_CUM_TIME_OUT_XK_TICKS defines SMBus Controller Cumulative Time-Out.
* SMBus Controller Cumulative Time-Out duration =
* CTRL_CUM_TIME_OUT_XK_TICKS[7:0] x Baud_Clock_Period x 2048
*/
-#define CTRL_CUM_TIME_OUT_100K_TICKS 159
-#define CTRL_CUM_TIME_OUT_400K_TICKS 159
-#define CTRL_CUM_TIME_OUT_1000K_TICKS 159
+#define CTRL_CUM_TIME_OUT_100K_TICKS 76
+#define CTRL_CUM_TIME_OUT_400K_TICKS 76
+#define CTRL_CUM_TIME_OUT_1000K_TICKS 76
/*
* TARGET_CUM_TIME_OUT_XK_TICKS defines SMBus Target Cumulative Time-Out duration.
* SMBus Target Cumulative Time-Out duration = TARGET_CUM_TIME_OUT_XK_TICKS[7:0] x
* Baud_Clock_Period x 4096
*/
-#define TARGET_CUM_TIME_OUT_100K_TICKS 199
-#define TARGET_CUM_TIME_OUT_400K_TICKS 199
-#define TARGET_CUM_TIME_OUT_1000K_TICKS 199
+#define TARGET_CUM_TIME_OUT_100K_TICKS 95
+#define TARGET_CUM_TIME_OUT_400K_TICKS 95
+#define TARGET_CUM_TIME_OUT_1000K_TICKS 95
/*
* CLOCK_HIGH_TIME_OUT_XK defines Clock High time out period.
* Clock High time out period = CLOCK_HIGH_TIME_OUT_XK[7:0] x Baud_Clock_Period x 8
*/
-#define CLOCK_HIGH_TIME_OUT_100K_TICKS 204
-#define CLOCK_HIGH_TIME_OUT_400K_TICKS 204
-#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 204
+#define CLOCK_HIGH_TIME_OUT_100K_TICKS 97
+#define CLOCK_HIGH_TIME_OUT_400K_TICKS 97
+#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 97
#define TO_SCALING_100K \
((BUS_IDLE_MIN_100K_TICKS << 24) | (CTRL_CUM_TIME_OUT_100K_TICKS << 16) | \
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index a0af027db04c..2e575856c5cd 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -342,18 +342,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c)
* ocores_isr(), we just add our polling code around it.
*
* It can run in atomic context
+ *
+ * Return: 0 on success, -ETIMEDOUT on timeout
*/
-static void ocores_process_polling(struct ocores_i2c *i2c)
+static int ocores_process_polling(struct ocores_i2c *i2c)
{
- while (1) {
- irqreturn_t ret;
- int err;
+ irqreturn_t ret;
+ int err = 0;
+ while (1) {
err = ocores_poll_wait(i2c);
- if (err) {
- i2c->state = STATE_ERROR;
+ if (err)
break; /* timeout */
- }
ret = ocores_isr(-1, i2c);
if (ret == IRQ_NONE)
@@ -364,13 +364,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c)
break;
}
}
+
+ return err;
}
static int ocores_xfer_core(struct ocores_i2c *i2c,
struct i2c_msg *msgs, int num,
bool polling)
{
- int ret;
+ int ret = 0;
u8 ctrl;
ctrl = oc_getreg(i2c, OCI2C_CONTROL);
@@ -388,15 +390,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c,
oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START);
if (polling) {
- ocores_process_polling(i2c);
+ ret = ocores_process_polling(i2c);
} else {
- ret = wait_event_timeout(i2c->wait,
- (i2c->state == STATE_ERROR) ||
- (i2c->state == STATE_DONE), HZ);
- if (ret == 0) {
- ocores_process_timeout(i2c);
- return -ETIMEDOUT;
- }
+ if (wait_event_timeout(i2c->wait,
+ (i2c->state == STATE_ERROR) ||
+ (i2c->state == STATE_DONE), HZ) == 0)
+ ret = -ETIMEDOUT;
+ }
+ if (ret) {
+ ocores_process_timeout(i2c);
+ return ret;
}
return (i2c->state == STATE_DONE) ? num : -EIO;
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index bce6b796e04c..545436b7dd53 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -178,6 +178,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
return NOTIFY_OK;
}
+ /*
+ * Clear the flag before adding the device so that fw_devlink
+ * doesn't skip adding consumers to this device.
+ */
+ rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
client = of_i2c_register_device(adap, rd->dn);
if (IS_ERR(client)) {
dev_err(&adap->dev, "failed to create client for '%pOF'\n",
diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c
index f866859855cd..1c3a72380fb8 100644
--- a/drivers/iio/accel/kionix-kx022a.c
+++ b/drivers/iio/accel/kionix-kx022a.c
@@ -864,7 +864,7 @@ static irqreturn_t kx022a_trigger_handler(int irq, void *p)
if (ret < 0)
goto err_read;
- iio_push_to_buffers_with_timestamp(idev, data->buffer, pf->timestamp);
+ iio_push_to_buffers_with_timestamp(idev, data->buffer, data->timestamp);
err_read:
iio_trigger_notify_done(idev->trig);
diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c
index fee8d129a5f0..86effe8501b4 100644
--- a/drivers/iio/adc/ad7791.c
+++ b/drivers/iio/adc/ad7791.c
@@ -253,7 +253,7 @@ static const struct ad_sigma_delta_info ad7791_sigma_delta_info = {
.has_registers = true,
.addr_shift = 4,
.read_mask = BIT(3),
- .irq_flags = IRQF_TRIGGER_LOW,
+ .irq_flags = IRQF_TRIGGER_FALLING,
};
static int ad7791_read_raw(struct iio_dev *indio_dev,
diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c
index 17370c5eb6fe..ec198c6f13d6 100644
--- a/drivers/iio/adc/ltc2497.c
+++ b/drivers/iio/adc/ltc2497.c
@@ -28,7 +28,6 @@ struct ltc2497_driverdata {
struct ltc2497core_driverdata common_ddata;
struct i2c_client *client;
u32 recv_size;
- u32 sub_lsb;
/*
* DMA (thus cache coherency maintenance) may require the
* transfer buffers to live in their own cache lines.
@@ -65,10 +64,10 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata,
* equivalent to a sign extension.
*/
if (st->recv_size == 3) {
- *val = (get_unaligned_be24(st->data.d8) >> st->sub_lsb)
+ *val = (get_unaligned_be24(st->data.d8) >> 6)
- BIT(ddata->chip_info->resolution + 1);
} else {
- *val = (be32_to_cpu(st->data.d32) >> st->sub_lsb)
+ *val = (be32_to_cpu(st->data.d32) >> 6)
- BIT(ddata->chip_info->resolution + 1);
}
@@ -122,7 +121,6 @@ static int ltc2497_probe(struct i2c_client *client)
st->common_ddata.chip_info = chip_info;
resolution = chip_info->resolution;
- st->sub_lsb = 31 - (resolution + 1);
st->recv_size = BITS_TO_BYTES(resolution) + 1;
return ltc2497core_probe(dev, indio_dev);
diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c
index b74b689ee7de..f6895bc9fc4b 100644
--- a/drivers/iio/adc/max11410.c
+++ b/drivers/iio/adc/max11410.c
@@ -414,13 +414,17 @@ static int max11410_sample(struct max11410_state *st, int *sample_raw,
if (!ret)
return -ETIMEDOUT;
} else {
+ int ret2;
+
/* Wait for status register Conversion Ready flag */
- ret = read_poll_timeout(max11410_read_reg, ret,
- ret || (val & MAX11410_STATUS_CONV_READY_BIT),
+ ret = read_poll_timeout(max11410_read_reg, ret2,
+ ret2 || (val & MAX11410_STATUS_CONV_READY_BIT),
5000, MAX11410_CONVERSION_TIMEOUT_MS * 1000,
true, st, MAX11410_REG_STATUS, &val);
if (ret)
return ret;
+ if (ret2)
+ return ret2;
}
/* Read ADC Data */
@@ -851,17 +855,21 @@ static int max11410_init_vref(struct device *dev,
static int max11410_calibrate(struct max11410_state *st, u32 cal_type)
{
- int ret, val;
+ int ret, ret2, val;
ret = max11410_write_reg(st, MAX11410_REG_CAL_START, cal_type);
if (ret)
return ret;
/* Wait for status register Calibration Ready flag */
- return read_poll_timeout(max11410_read_reg, ret,
- ret || (val & MAX11410_STATUS_CAL_READY_BIT),
- 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true,
- st, MAX11410_REG_STATUS, &val);
+ ret = read_poll_timeout(max11410_read_reg, ret2,
+ ret2 || (val & MAX11410_STATUS_CAL_READY_BIT),
+ 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true,
+ st, MAX11410_REG_STATUS, &val);
+ if (ret)
+ return ret;
+
+ return ret2;
}
static int max11410_self_calibrate(struct max11410_state *st)
diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c
index fd000345ec5c..849a697a467e 100644
--- a/drivers/iio/adc/palmas_gpadc.c
+++ b/drivers/iio/adc/palmas_gpadc.c
@@ -639,7 +639,7 @@ out:
static int palmas_gpadc_remove(struct platform_device *pdev)
{
- struct iio_dev *indio_dev = dev_to_iio_dev(&pdev->dev);
+ struct iio_dev *indio_dev = dev_get_drvdata(&pdev->dev);
struct palmas_gpadc *adc = iio_priv(indio_dev);
if (adc->wakeup1_enable || adc->wakeup2_enable)
diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c
index e90c299c913a..c2d5e06f137a 100644
--- a/drivers/iio/adc/qcom-spmi-adc5.c
+++ b/drivers/iio/adc/qcom-spmi-adc5.c
@@ -628,12 +628,20 @@ static int adc5_get_fw_channel_data(struct adc5_chip *adc,
struct fwnode_handle *fwnode,
const struct adc5_data *data)
{
- const char *name = fwnode_get_name(fwnode), *channel_name;
+ const char *channel_name;
+ char *name;
u32 chan, value, varr[2];
u32 sid = 0;
int ret;
struct device *dev = adc->dev;
+ name = devm_kasprintf(dev, GFP_KERNEL, "%pfwP", fwnode);
+ if (!name)
+ return -ENOMEM;
+
+ /* Cut the address part */
+ name[strchrnul(name, '@') - name] = '\0';
+
ret = fwnode_property_read_u32(fwnode, "reg", &chan);
if (ret) {
dev_err(dev, "invalid channel number %s\n", name);
diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c
index 2cc9a9bd9db6..263fc3a1b87e 100644
--- a/drivers/iio/adc/ti-ads7950.c
+++ b/drivers/iio/adc/ti-ads7950.c
@@ -634,6 +634,7 @@ static int ti_ads7950_probe(struct spi_device *spi)
st->chip.label = dev_name(&st->spi->dev);
st->chip.parent = &st->spi->dev;
st->chip.owner = THIS_MODULE;
+ st->chip.can_sleep = true;
st->chip.base = -1;
st->chip.ngpio = TI_ADS7950_NUM_GPIOS;
st->chip.get_direction = ti_ads7950_get_direction;
diff --git a/drivers/iio/dac/cio-dac.c b/drivers/iio/dac/cio-dac.c
index 791dd999cf29..18a64f72fc18 100644
--- a/drivers/iio/dac/cio-dac.c
+++ b/drivers/iio/dac/cio-dac.c
@@ -66,8 +66,8 @@ static int cio_dac_write_raw(struct iio_dev *indio_dev,
if (mask != IIO_CHAN_INFO_RAW)
return -EINVAL;
- /* DAC can only accept up to a 16-bit value */
- if ((unsigned int)val > 65535)
+ /* DAC can only accept up to a 12-bit value */
+ if ((unsigned int)val > 4095)
return -EINVAL;
priv->chan_out_states[chan->channel] = val;
diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
index f1d7d4b5e222..c2f97629e9cd 100644
--- a/drivers/iio/imu/Kconfig
+++ b/drivers/iio/imu/Kconfig
@@ -47,6 +47,7 @@ config ADIS16480
depends on SPI
select IIO_ADIS_LIB
select IIO_ADIS_LIB_BUFFER if IIO_BUFFER
+ select CRC32
help
Say yes here to build support for Analog Devices ADIS16375, ADIS16480,
ADIS16485, ADIS16488 inertial sensors.
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 80c78bd6bbef..a7a080bed180 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -203,24 +203,27 @@ static ssize_t iio_buffer_write(struct file *filp, const char __user *buf,
break;
}
+ if (filp->f_flags & O_NONBLOCK) {
+ if (!written)
+ ret = -EAGAIN;
+ break;
+ }
+
wait_woken(&wait, TASK_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
continue;
}
ret = rb->access->write(rb, n - written, buf + written);
- if (ret == 0 && (filp->f_flags & O_NONBLOCK))
- ret = -EAGAIN;
+ if (ret < 0)
+ break;
- if (ret > 0) {
- written += ret;
- if (written != n && !(filp->f_flags & O_NONBLOCK))
- continue;
- }
- } while (ret == 0);
+ written += ret;
+
+ } while (written != n);
remove_wait_queue(&rb->pollq, &wait);
- return ret < 0 ? ret : n;
+ return ret < 0 ? ret : written;
}
/**
diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c
index b1674a5bfa36..d4a34a3bf00d 100644
--- a/drivers/iio/light/cm32181.c
+++ b/drivers/iio/light/cm32181.c
@@ -429,6 +429,14 @@ static const struct iio_info cm32181_info = {
.attrs = &cm32181_attribute_group,
};
+static void cm32181_unregister_dummy_client(void *data)
+{
+ struct i2c_client *client = data;
+
+ /* Unregister the dummy client */
+ i2c_unregister_device(client);
+}
+
static int cm32181_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
@@ -460,6 +468,10 @@ static int cm32181_probe(struct i2c_client *client)
client = i2c_acpi_new_device(dev, 1, &board_info);
if (IS_ERR(client))
return PTR_ERR(client);
+
+ ret = devm_add_action_or_reset(dev, cm32181_unregister_dummy_client, client);
+ if (ret)
+ return ret;
}
cm32181 = iio_priv(indio_dev);
diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index 6bdfce9747f9..5c44a36ab5b3 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -208,7 +208,6 @@ static int vcnl4000_init(struct vcnl4000_data *data)
data->rev = ret & 0xf;
data->al_scale = 250000;
- mutex_init(&data->vcnl4000_lock);
return data->chip_spec->set_power_state(data, true);
};
@@ -1367,6 +1366,8 @@ static int vcnl4000_probe(struct i2c_client *client)
data->id = id->driver_data;
data->chip_spec = &vcnl4000_chip_spec_cfg[data->id];
+ mutex_init(&data->vcnl4000_lock);
+
ret = data->chip_spec->init(data);
if (ret < 0)
return ret;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 308155937713..6b9563d4f23c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -624,22 +624,11 @@ static inline unsigned short cma_family(struct rdma_id_private *id_priv)
return id_priv->id.route.addr.src_addr.ss_family;
}
-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+static int cma_set_default_qkey(struct rdma_id_private *id_priv)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey) {
- if (qkey && id_priv->qkey != qkey)
- return -EINVAL;
- return 0;
- }
-
- if (qkey) {
- id_priv->qkey = qkey;
- return 0;
- }
-
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
case RDMA_PS_IB:
@@ -659,6 +648,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+{
+ if (!qkey ||
+ (id_priv->qkey && (id_priv->qkey != qkey)))
+ return -EINVAL;
+
+ id_priv->qkey = qkey;
+ return 0;
+}
+
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -1229,7 +1228,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
- ret = cma_set_qkey(id_priv, 0);
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
@@ -4569,7 +4568,10 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv, qkey);
+ if (qkey)
+ ret = cma_set_qkey(id_priv, qkey);
+ else
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
@@ -4774,9 +4776,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
enum ib_gid_type gid_type;
struct net_device *ndev;
- if (!status)
- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
- else
+ if (status)
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
@@ -4804,7 +4804,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
}
event->param.ud.qp_num = 0xFFFFFF;
- event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ event->param.ud.qkey = id_priv->qkey;
out:
if (ndev)
@@ -4823,8 +4823,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
goto out;
- cma_make_mc_event(status, id_priv, multicast, &event, mc);
- ret = cma_cm_event_handler(id_priv, &event);
+ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ if (!ret) {
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
+ }
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
WARN_ON(ret);
@@ -4877,9 +4880,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (ret)
return ret;
- ret = cma_set_qkey(id_priv, 0);
- if (ret)
- return ret;
+ if (!id_priv->qkey) {
+ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+ }
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
rec.qkey = cpu_to_be32(id_priv->qkey);
@@ -4956,9 +4961,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
ib.rec.pkey = cpu_to_be16(0xffff);
- if (id_priv->id.ps == RDMA_PS_UDP)
- ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
-
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev)
@@ -4984,6 +4986,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (err || !ib.rec.mtu)
return err ?: -EINVAL;
+ if (!id_priv->qkey)
+ cma_set_default_qkey(id_priv);
+
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&ib.rec.port_gid);
INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
@@ -5009,6 +5014,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
+ if (id_priv->id.qp_type != IB_QPT_UD)
+ return -EINVAL;
+
mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 11b1c1603aeb..b99b3cc283b6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -532,6 +532,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
else
ret = device->ops.create_ah(ah, &init_attr, NULL);
if (ret) {
+ if (ah->sgid_attr)
+ rdma_put_gid_attr(ah->sgid_attr);
kfree(ah);
return ERR_PTR(ret);
}
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index cabd8678b355..7bc354273d4e 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -65,7 +65,7 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
[ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
- [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD,
+ [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD,
};
static const struct {
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 4c38d99c73f1..37ad1bb1917c 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -441,7 +441,7 @@ struct erdma_reg_mr_sqe {
};
/* EQ related. */
-#define ERDMA_DEFAULT_EQ_DEPTH 256
+#define ERDMA_DEFAULT_EQ_DEPTH 4096
/* ceqe */
#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63)
@@ -491,7 +491,7 @@ enum erdma_opcode {
ERDMA_OP_LOCAL_INV = 15,
ERDMA_OP_READ_WITH_INV = 16,
ERDMA_OP_ATOMIC_CAS = 17,
- ERDMA_OP_ATOMIC_FAD = 18,
+ ERDMA_OP_ATOMIC_FAA = 18,
ERDMA_NUM_OPCODES = 19,
ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
};
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 5dc31e5df5cb..4a29a53a6652 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -56,7 +56,7 @@ done:
static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
{
struct net_device *netdev;
- int ret = -ENODEV;
+ int ret = -EPROBE_DEFER;
/* Already binded to a net_device, so we skip. */
if (dev->netdev)
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index d088d6bef431..44923c51a01b 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -405,7 +405,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
mr->mem.mtt_nents);
- if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
/* Copy SGLs to SQE content to accelerate */
memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
@@ -439,7 +439,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
cpu_to_le64(atomic_wr(send_wr)->compare_add);
} else {
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
- ERDMA_OP_ATOMIC_FAD);
+ ERDMA_OP_ATOMIC_FAA);
atomic_sqe->fetchadd_swap_data =
cpu_to_le64(atomic_wr(send_wr)->compare_add);
}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index e0a993bc032a..131cf5f40982 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -11,7 +11,7 @@
/* RDMA Capability. */
#define ERDMA_MAX_PD (128 * 1024)
-#define ERDMA_MAX_SEND_WR 4096
+#define ERDMA_MAX_SEND_WR 8192
#define ERDMA_MAX_ORD 128
#define ERDMA_MAX_IRD 128
#define ERDMA_MAX_SGE_RD 1
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 195aa9ea18b6..8817864154af 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1458,13 +1458,15 @@ static int irdma_send_fin(struct irdma_cm_node *cm_node)
* irdma_find_listener - find a cm node listening on this addr-port pair
* @cm_core: cm's core
* @dst_addr: listener ip addr
+ * @ipv4: flag indicating IPv4 when true
* @dst_port: listener tcp port num
* @vlan_id: virtual LAN ID
* @listener_state: state to match with listen node's
*/
static struct irdma_cm_listener *
-irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
- u16 vlan_id, enum irdma_cm_listener_state listener_state)
+irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4,
+ u16 dst_port, u16 vlan_id,
+ enum irdma_cm_listener_state listener_state)
{
struct irdma_cm_listener *listen_node;
static const u32 ip_zero[4] = { 0, 0, 0, 0 };
@@ -1477,7 +1479,7 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
list_for_each_entry (listen_node, &cm_core->listen_list, list) {
memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
listen_port = listen_node->loc_port;
- if (listen_port != dst_port ||
+ if (listen_node->ipv4 != ipv4 || listen_port != dst_port ||
!(listener_state & listen_node->listener_state))
continue;
/* compare node pair, return node handle if a match */
@@ -2902,9 +2904,10 @@ irdma_make_listen_node(struct irdma_cm_core *cm_core,
unsigned long flags;
/* cannot have multiple matching listeners */
- listener = irdma_find_listener(cm_core, cm_info->loc_addr,
- cm_info->loc_port, cm_info->vlan_id,
- IRDMA_CM_LISTENER_EITHER_STATE);
+ listener =
+ irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4,
+ cm_info->loc_port, cm_info->vlan_id,
+ IRDMA_CM_LISTENER_EITHER_STATE);
if (listener &&
listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
refcount_dec(&listener->refcnt);
@@ -3153,6 +3156,7 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
listener = irdma_find_listener(cm_core,
cm_info.loc_addr,
+ cm_info.ipv4,
cm_info.loc_port,
cm_info.vlan_id,
IRDMA_CM_LISTENER_ACTIVE_STATE);
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
index 19c284975fc7..7feadb3e1eda 100644
--- a/drivers/infiniband/hw/irdma/cm.h
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -41,7 +41,7 @@
#define TCP_OPTIONS_PADDING 3
#define IRDMA_DEFAULT_RETRYS 64
-#define IRDMA_DEFAULT_RETRANS 8
+#define IRDMA_DEFAULT_RETRANS 32
#define IRDMA_DEFAULT_TTL 0x40
#define IRDMA_DEFAULT_RTT_VAR 6
#define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 2e1e2bad0401..43dfa4761f06 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -41,6 +41,7 @@ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = {
IRDMA_HMC_IW_XFFL,
IRDMA_HMC_IW_Q1,
IRDMA_HMC_IW_Q1FL,
+ IRDMA_HMC_IW_PBLE,
IRDMA_HMC_IW_TIMER,
IRDMA_HMC_IW_FSIMC,
IRDMA_HMC_IW_FSIAV,
@@ -827,6 +828,8 @@ static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged,
info.entry_type = rf->sd_type;
for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE)
+ continue;
if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) {
info.rsrc_type = iw_hmc_obj_types[i];
info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 445e69e86409..7887230c867b 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2595,7 +2595,10 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
/* remove the SQ WR by moving SQ tail*/
IRDMA_RING_SET_TAIL(*sq_ring,
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
-
+ if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) {
+ kfree(cmpl);
+ continue;
+ }
ibdev_dbg(iwqp->iwscq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5b988db66b8f..5d45de223c43 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -442,6 +442,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_NDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_HDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_NDR;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index f642ec8e92dd..29131f1a2f06 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -781,9 +781,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
input_report_key(dev, BTN_C, data[8]);
input_report_key(dev, BTN_Z, data[9]);
- /* Profile button has a value of 0-3, so it is reported as an axis */
- if (xpad->mapping & MAP_PROFILE_BUTTON)
- input_report_abs(dev, ABS_PROFILE, data[34]);
input_sync(dev);
}
@@ -1061,6 +1058,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char
(__u16) le16_to_cpup((__le16 *)(data + 8)));
}
+ /* Profile button has a value of 0-3, so it is reported as an axis */
+ if (xpad->mapping & MAP_PROFILE_BUTTON)
+ input_report_abs(dev, ABS_PROFILE, data[34]);
+
/* paddle handling */
/* based on SDL's SDL_hidapi_xboxone.c */
if (xpad->mapping & MAP_PADDLES) {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 989228b5a0a4..e2c11d9f3868 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse)
x = y = z = 0;
/* Divide 4 since trackpoint's speed is too fast */
- input_report_rel(dev2, REL_X, (char)x / 4);
- input_report_rel(dev2, REL_Y, -((char)y / 4));
+ input_report_rel(dev2, REL_X, (s8)x / 4);
+ input_report_rel(dev2, REL_Y, -((s8)y / 4));
psmouse_report_standard_buttons(dev2, packet[3]);
@@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
((packet[3] & 0x20) << 1);
z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1);
- input_report_rel(dev2, REL_X, (char)x);
- input_report_rel(dev2, REL_Y, -((char)y));
+ input_report_rel(dev2, REL_X, (s8)x);
+ input_report_rel(dev2, REL_Y, -((s8)y));
input_report_abs(dev2, ABS_PRESSURE, z);
psmouse_report_standard_buttons(dev2, packet[1]);
@@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch)
if (reg < 0)
return reg;
- x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */
- y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */
+ y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */
y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */
reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1);
if (reg < 0)
return reg;
- x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
x_electrode = 17 + x_electrode;
- y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */
+ y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */
y_electrode = 13 + y_electrode;
x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */
diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c
index 6fd5fff0cbff..c74b99077d16 100644
--- a/drivers/input/mouse/focaltech.c
+++ b/drivers/input/mouse/focaltech.c
@@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
state->pressed = packet[0] >> 7;
finger1 = ((packet[0] >> 4) & 0x7) - 1;
if (finger1 < FOC_MAX_FINGERS) {
- state->fingers[finger1].x += (char)packet[1];
- state->fingers[finger1].y += (char)packet[2];
+ state->fingers[finger1].x += (s8)packet[1];
+ state->fingers[finger1].y += (s8)packet[2];
} else {
psmouse_err(psmouse, "First finger in rel packet invalid: %d\n",
finger1);
@@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
*/
finger2 = ((packet[3] >> 4) & 0x7) - 1;
if (finger2 < FOC_MAX_FINGERS) {
- state->fingers[finger2].x += (char)packet[4];
- state->fingers[finger2].y += (char)packet[5];
+ state->fingers[finger2].x += (s8)packet[4];
+ state->fingers[finger2].y += (s8)packet[5];
}
}
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index efc61736099b..028e45bd050b 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -611,6 +611,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX)
},
{
+ /* Fujitsu Lifebook A574/H */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
/* Gigabyte M912 */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
@@ -1117,6 +1125,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
{
+ /*
+ * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
+ * the keyboard very laggy for ~5 seconds after boot and
+ * sometimes also after resume.
+ * However both are required for the keyboard to not fail
+ * completely sometimes after boot or resume.
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "N150CU"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
},
@@ -1124,6 +1146,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
{
+ /*
+ * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
+ * the keyboard very laggy for ~5 seconds after boot and
+ * sometimes also after resume.
+ * However both are required for the keyboard to not fail
+ * completely sometimes after boot or resume.
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
},
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index b348172f19c3..d77f116680a0 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = {
static const struct dmi_system_id nine_bytes_report[] = {
#if defined(CONFIG_DMI) && defined(CONFIG_X86)
{
- .ident = "Lenovo YogaBook",
- /* YB1-X91L/F and YB1-X90L/F */
+ /* Lenovo Yoga Book X90F / X90L */
.matches = {
- DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+ }
+ },
+ {
+ /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+ /* Non exact match to match F + L versions */
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
}
},
#endif
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 483aaaeb6dae..1abd187c6075 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1415,23 +1415,26 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
return &data->iommu;
}
-static void exynos_iommu_release_device(struct device *dev)
+static void exynos_iommu_set_platform_dma(struct device *dev)
{
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
- struct sysmmu_drvdata *data;
if (owner->domain) {
struct iommu_group *group = iommu_group_get(dev);
if (group) {
-#ifndef CONFIG_ARM
- WARN_ON(owner->domain !=
- iommu_group_default_domain(group));
-#endif
exynos_iommu_detach_device(owner->domain, dev);
iommu_group_put(group);
}
}
+}
+
+static void exynos_iommu_release_device(struct device *dev)
+{
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
+ struct sysmmu_drvdata *data;
+
+ exynos_iommu_set_platform_dma(dev);
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1479,7 +1482,7 @@ static const struct iommu_ops exynos_iommu_ops = {
.domain_alloc = exynos_iommu_domain_alloc,
.device_group = generic_device_group,
#ifdef CONFIG_ARM
- .set_platform_dma_ops = exynos_iommu_release_device,
+ .set_platform_dma_ops = exynos_iommu_set_platform_dma,
#endif
.probe_device = exynos_iommu_probe_device,
.release_device = exynos_iommu_release_device,
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 6acfe879589c..23828d189c2a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1071,7 +1071,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
}
err = -EINVAL;
- if (cap_sagaw(iommu->cap) == 0) {
+ if (!cap_sagaw(iommu->cap) &&
+ (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
iommu->name);
drhd->ignored = 1;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d6df3b865812..694ab9b7d3e9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -641,6 +641,8 @@ struct iommu_pmu {
DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
struct perf_event *event_list[IOMMU_PMU_IDX_MAX];
unsigned char irq_name[16];
+ struct hlist_node cpuhp_node;
+ int cpu;
};
#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED)
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 6d01fa078c36..df9e261af0b5 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -311,14 +311,12 @@ static int set_ioapic_sid(struct irte *irte, int apic)
if (!irte)
return -1;
- down_read(&dmar_global_lock);
for (i = 0; i < MAX_IO_APICS; i++) {
if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
break;
}
}
- up_read(&dmar_global_lock);
if (sid == 0) {
pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic);
@@ -338,14 +336,12 @@ static int set_hpet_sid(struct irte *irte, u8 id)
if (!irte)
return -1;
- down_read(&dmar_global_lock);
for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
break;
}
}
- up_read(&dmar_global_lock);
if (sid == 0) {
pr_warn("Failed to set source-id of HPET block (%d)\n", id);
@@ -1339,9 +1335,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
if (!data)
goto out_free_parent;
- down_read(&dmar_global_lock);
index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs);
- up_read(&dmar_global_lock);
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
kfree(data);
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index e17d9743a0d8..cf43e798eca4 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
iommu->perf_irq = 0;
}
-static int iommu_pmu_cpu_online(unsigned int cpu)
+static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
{
+ struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+
if (cpumask_empty(&iommu_pmu_cpu_mask))
cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+ if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
+ iommu_pmu->cpu = cpu;
+
return 0;
}
-static int iommu_pmu_cpu_offline(unsigned int cpu)
+static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- int target;
+ struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+ int target = cpumask_first(&iommu_pmu_cpu_mask);
+
+ /*
+ * The iommu_pmu_cpu_mask has been updated when offline the CPU
+ * for the first iommu_pmu. Migrate the other iommu_pmu to the
+ * new target.
+ */
+ if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
+ perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+ iommu_pmu->cpu = target;
+ return 0;
+ }
if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
return 0;
@@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu)
if (target < nr_cpu_ids)
cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
else
- target = -1;
+ return 0;
- rcu_read_lock();
-
- for_each_iommu(iommu, drhd) {
- if (!iommu->pmu)
- continue;
- perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
- }
- rcu_read_unlock();
+ perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+ iommu_pmu->cpu = target;
return 0;
}
static int nr_iommu_pmu;
+static enum cpuhp_state iommu_cpuhp_slot;
static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
{
int ret;
- if (nr_iommu_pmu++)
- return 0;
+ if (!nr_iommu_pmu) {
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "driver/iommu/intel/perfmon:online",
+ iommu_pmu_cpu_online,
+ iommu_pmu_cpu_offline);
+ if (ret < 0)
+ return ret;
+ iommu_cpuhp_slot = ret;
+ }
- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
- "driver/iommu/intel/perfmon:online",
- iommu_pmu_cpu_online,
- iommu_pmu_cpu_offline);
- if (ret)
- nr_iommu_pmu = 0;
+ ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+ if (ret) {
+ if (!nr_iommu_pmu)
+ cpuhp_remove_multi_state(iommu_cpuhp_slot);
+ return ret;
+ }
+ nr_iommu_pmu++;
- return ret;
+ return 0;
}
static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
{
+ cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+
if (--nr_iommu_pmu)
return;
- cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+ cpuhp_remove_multi_state(iommu_cpuhp_slot);
}
void iommu_pmu_register(struct intel_iommu *iommu)
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index f8d92c9bb65b..3c47846cc5ef 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
batch->npfns[batch->end - 1] < keep_pfns);
batch->total_pfns = keep_pfns;
- batch->npfns[0] = keep_pfns;
batch->pfns[0] = batch->pfns[batch->end - 1] +
(batch->npfns[batch->end - 1] - keep_pfns);
+ batch->npfns[0] = keep_pfns;
batch->end = 0;
}
@@ -1142,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
bool writable)
{
struct iopt_pages *pages;
+ unsigned long end;
/*
* The iommu API uses size_t as the length, and protect the DIV_ROUND_UP
@@ -1150,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
if (length > SIZE_MAX - PAGE_SIZE || length == 0)
return ERR_PTR(-EINVAL);
+ if (check_add_overflow((unsigned long)uptr, length, &end))
+ return ERR_PTR(-EOVERFLOW);
+
pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT);
if (!pages)
return ERR_PTR(-ENOMEM);
@@ -1203,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area,
unsigned long start =
max(start_index, *unmapped_end_index);
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+ batch->total_pfns)
+ WARN_ON(*unmapped_end_index -
+ batch->total_pfns !=
+ start_index);
batch_from_domain(batch, domain, area, start,
last_index);
- batch_last_index = start + batch->total_pfns - 1;
+ batch_last_index = start_index + batch->total_pfns - 1;
} else {
batch_last_index = last_index;
}
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+ WARN_ON(batch_last_index > real_last_index);
+
/*
* unmaps must always 'cut' at a place where the pfns are not
* contiguous to pair with the maps that always install
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 39e49e5d7182..13321dbb5fbc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6260,7 +6260,6 @@ static void __md_stop(struct mddev *mddev)
module_put(pers->owner);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- percpu_ref_exit(&mddev->writes_pending);
percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
@@ -6273,6 +6272,7 @@ void md_stop(struct mddev *mddev)
*/
__md_stop_writes(mddev);
__md_stop(mddev);
+ percpu_ref_exit(&mddev->writes_pending);
}
EXPORT_SYMBOL_GPL(md_stop);
@@ -7843,6 +7843,7 @@ static void md_free_disk(struct gendisk *disk)
{
struct mddev *mddev = disk->private_data;
+ percpu_ref_exit(&mddev->writes_pending);
mddev_free(mddev);
}
diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c
index 49d6c8bdec41..48ae2e0adf9e 100644
--- a/drivers/media/i2c/imx290.c
+++ b/drivers/media/i2c/imx290.c
@@ -1098,7 +1098,7 @@ static int imx290_runtime_suspend(struct device *dev)
}
static const struct dev_pm_ops imx290_pm_ops = {
- SET_RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL)
+ RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL)
};
/* ----------------------------------------------------------------------------
@@ -1362,8 +1362,8 @@ static struct i2c_driver imx290_i2c_driver = {
.remove = imx290_remove,
.driver = {
.name = "imx290",
- .pm = &imx290_pm_ops,
- .of_match_table = of_match_ptr(imx290_of_match),
+ .pm = pm_ptr(&imx290_pm_ops),
+ .of_match_table = imx290_of_match,
},
};
diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 61ff20a7e935..cfb11c551167 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core)
writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR);
writel(0, wrapper_base + WRAPPER_CPA_START_ADDR);
writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR);
- writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR);
- writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR);
+ writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR);
+ writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR);
if (IS_V6(core)) {
/* Bring XTSS out of reset */
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index bf7667845459..bbfaf6536903 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -410,6 +410,7 @@ static struct memstick_dev *memstick_alloc_card(struct memstick_host *host)
return card;
err_out:
host->card = old_card;
+ kfree_const(card->dev.kobj.name);
kfree(card);
return NULL;
}
@@ -468,8 +469,10 @@ static void memstick_check(struct work_struct *work)
put_device(&card->dev);
host->card = NULL;
}
- } else
+ } else {
+ kfree_const(card->dev.kobj.name);
kfree(card);
+ }
}
out_power_off:
diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 89953093e20c..672d37ea98d0 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -351,8 +351,6 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
*/
case MMC_TIMING_SD_HS:
case MMC_TIMING_MMC_HS:
- case MMC_TIMING_UHS_SDR12:
- case MMC_TIMING_UHS_SDR25:
val &= ~SDHCI_CTRL_HISPD;
}
}
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 1e94e7d10b8b..a0a1194dc1d9 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -153,7 +153,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
mtdblk->cache_state = STATE_EMPTY;
ret = mtd_read(mtd, sect_start, sect_size,
&retlen, mtdblk->cache_data);
- if (ret)
+ if (ret && !mtd_is_bitflip(ret))
return ret;
if (retlen != sect_size)
return -EIO;
@@ -188,8 +188,12 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
pr_debug("mtdblock: read on \"%s\" at 0x%lx, size 0x%x\n",
mtd->name, pos, len);
- if (!sect_size)
- return mtd_read(mtd, pos, len, &retlen, buf);
+ if (!sect_size) {
+ ret = mtd_read(mtd, pos, len, &retlen, buf);
+ if (ret && !mtd_is_bitflip(ret))
+ return ret;
+ return 0;
+ }
while (len > 0) {
unsigned long sect_start = (pos/sect_size)*sect_size;
@@ -209,7 +213,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
memcpy (buf, mtdblk->cache_data + offset, size);
} else {
ret = mtd_read(mtd, pos, size, &retlen, buf);
- if (ret)
+ if (ret && !mtd_is_bitflip(ret))
return ret;
if (retlen != size)
return -EIO;
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index a28574c00900..074e14225c06 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -280,7 +280,7 @@ static void meson_nfc_cmd_access(struct nand_chip *nand, int raw, bool dir,
if (raw) {
len = mtd->writesize + mtd->oobsize;
- cmd = (len & GENMASK(5, 0)) | scrambler | DMA_DIR(dir);
+ cmd = (len & GENMASK(13, 0)) | scrambler | DMA_DIR(dir);
writel(cmd, nfc->reg_base + NFC_REG_CMD);
return;
}
@@ -544,7 +544,7 @@ static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len)
if (ret)
goto out;
- cmd = NFC_CMD_N2M | (len & GENMASK(5, 0));
+ cmd = NFC_CMD_N2M | (len & GENMASK(13, 0));
writel(cmd, nfc->reg_base + NFC_REG_CMD);
meson_nfc_drain_cmd(nfc);
@@ -568,7 +568,7 @@ static int meson_nfc_write_buf(struct nand_chip *nand, u8 *buf, int len)
if (ret)
return ret;
- cmd = NFC_CMD_M2N | (len & GENMASK(5, 0));
+ cmd = NFC_CMD_M2N | (len & GENMASK(13, 0));
writel(cmd, nfc->reg_base + NFC_REG_CMD);
meson_nfc_drain_cmd(nfc);
diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
index 5d627048c420..9e74bcd90aaa 100644
--- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
@@ -1531,6 +1531,9 @@ static int stm32_fmc2_nfc_setup_interface(struct nand_chip *chip, int chipnr,
if (IS_ERR(sdrt))
return PTR_ERR(sdrt);
+ if (conf->timings.mode > 3)
+ return -EOPNOTSUPP;
+
if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
return 0;
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 0a78045ca1d9..522d375aeccf 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -3343,7 +3343,19 @@ static struct spi_mem_driver spi_nor_driver = {
.remove = spi_nor_remove,
.shutdown = spi_nor_shutdown,
};
-module_spi_mem_driver(spi_nor_driver);
+
+static int __init spi_nor_module_init(void)
+{
+ return spi_mem_driver_register(&spi_nor_driver);
+}
+module_init(spi_nor_module_init);
+
+static void __exit spi_nor_module_exit(void)
+{
+ spi_mem_driver_unregister(&spi_nor_driver);
+ spi_nor_debugfs_shutdown();
+}
+module_exit(spi_nor_module_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index 25423225c29d..e0cc42a4a0c8 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -711,8 +711,10 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
#ifdef CONFIG_DEBUG_FS
void spi_nor_debugfs_register(struct spi_nor *nor);
+void spi_nor_debugfs_shutdown(void);
#else
static inline void spi_nor_debugfs_register(struct spi_nor *nor) {}
+static inline void spi_nor_debugfs_shutdown(void) {}
#endif
#endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */
diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c
index 845b78c7ecc7..fc7ad203df12 100644
--- a/drivers/mtd/spi-nor/debugfs.c
+++ b/drivers/mtd/spi-nor/debugfs.c
@@ -226,13 +226,13 @@ static void spi_nor_debugfs_unregister(void *data)
nor->debugfs_root = NULL;
}
+static struct dentry *rootdir;
+
void spi_nor_debugfs_register(struct spi_nor *nor)
{
- struct dentry *rootdir, *d;
+ struct dentry *d;
int ret;
- /* Create rootdir once. Will never be deleted again. */
- rootdir = debugfs_lookup(SPI_NOR_DEBUGFS_ROOT, NULL);
if (!rootdir)
rootdir = debugfs_create_dir(SPI_NOR_DEBUGFS_ROOT, NULL);
@@ -247,3 +247,8 @@ void spi_nor_debugfs_register(struct spi_nor *nor)
debugfs_create_file("capabilities", 0444, d, nor,
&spi_nor_capabilities_fops);
}
+
+void spi_nor_debugfs_shutdown(void)
+{
+ debugfs_remove(rootdir);
+}
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 0904eb40c95f..ad025b2ee417 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -666,12 +666,6 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
- if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) >
- ubi->vid_hdr_alsize)) {
- ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset);
- return -EINVAL;
- }
-
dbg_gen("min_io_size %d", ubi->min_io_size);
dbg_gen("max_write_size %d", ubi->max_write_size);
dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
@@ -689,6 +683,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
ubi->vid_hdr_aloffset;
}
+ /*
+ * Memory allocation for VID header is ubi->vid_hdr_alsize
+ * which is described in comments in io.c.
+ * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds
+ * ubi->vid_hdr_alsize, so that all vid header operations
+ * won't access memory out of bounds.
+ */
+ if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) {
+ ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)"
+ " + VID header size(%zu) > VID header aligned size(%d).",
+ ubi->vid_hdr_offset, ubi->vid_hdr_shift,
+ UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize);
+ return -EINVAL;
+ }
+
/* Similar for the data offset */
ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 40f39e5d6dfc..26a214f016c1 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
* @vol_id: the volume ID that last used this PEB
* @lnum: the last used logical eraseblock number for the PEB
* @torture: if the physical eraseblock has to be tortured
- * @nested: denotes whether the work_sem is already held in read mode
+ * @nested: denotes whether the work_sem is already held
*
* This function returns zero in case of success and a %-ENOMEM in case of
* failure.
@@ -1131,7 +1131,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
int err1;
/* Re-schedule the LEB for erasure */
- err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
+ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true);
if (err1) {
spin_lock(&ubi->wl_lock);
wl_entry_destroy(ubi, e);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 236e5219c811..710548dbd0c1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1777,14 +1777,15 @@ void bond_lower_state_changed(struct slave *slave)
/* The bonding driver uses ether_setup() to convert a master bond device
* to ARPHRD_ETHER, that resets the target netdevice's flags so we always
- * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set
+ * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP
+ * if they were set
*/
static void bond_ether_setup(struct net_device *bond_dev)
{
- unsigned int slave_flag = bond_dev->flags & IFF_SLAVE;
+ unsigned int flags = bond_dev->flags & (IFF_SLAVE | IFF_UP);
ether_setup(bond_dev);
- bond_dev->flags |= IFF_MASTER | slave_flag;
+ bond_dev->flags |= IFF_MASTER | flags;
bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
@@ -3269,7 +3270,8 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined);
if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP ||
- combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)
+ (combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
+ combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
goto out;
saddr = &combined->ip6.saddr;
@@ -3291,7 +3293,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
else if (curr_active_slave &&
time_after(slave_last_rx(bond, curr_active_slave),
curr_active_slave->last_link_up))
- bond_validate_na(bond, slave, saddr, daddr);
+ bond_validate_na(bond, slave, daddr, saddr);
else if (curr_arp_slave &&
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
bond_validate_na(bond, slave, saddr, daddr);
@@ -5695,9 +5697,13 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
struct ethtool_ts_info *info)
{
struct bonding *bond = netdev_priv(bond_dev);
+ struct ethtool_ts_info ts_info;
const struct ethtool_ops *ops;
struct net_device *real_dev;
+ bool sw_tx_support = false;
struct phy_device *phydev;
+ struct list_head *iter;
+ struct slave *slave;
int ret = 0;
rcu_read_lock();
@@ -5716,10 +5722,36 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
ret = ops->get_ts_info(real_dev, info);
goto out;
}
+ } else {
+ /* Check if all slaves support software tx timestamping */
+ rcu_read_lock();
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ ret = -1;
+ ops = slave->dev->ethtool_ops;
+ phydev = slave->dev->phydev;
+
+ if (phy_has_tsinfo(phydev))
+ ret = phy_ts_info(phydev, &ts_info);
+ else if (ops->get_ts_info)
+ ret = ops->get_ts_info(slave->dev, &ts_info);
+
+ if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) {
+ sw_tx_support = true;
+ continue;
+ }
+
+ sw_tx_support = false;
+ break;
+ }
+ rcu_read_unlock();
}
+ ret = 0;
info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE;
+ if (sw_tx_support)
+ info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
+
info->phc_index = -1;
out:
diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
index ea05abfbd51d..e68465fdf6b9 100644
--- a/drivers/net/dsa/microchip/ksz8.h
+++ b/drivers/net/dsa/microchip/ksz8.h
@@ -21,10 +21,6 @@ int ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries);
-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu);
-void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu);
void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt);
void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
u64 *dropped, u64 *cnt);
@@ -32,6 +28,10 @@ void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze);
void ksz8_port_init_cnt(struct ksz_device *dev, int port);
int ksz8_fdb_dump(struct ksz_device *dev, int port,
dsa_fdb_dump_cb_t *cb, void *data);
+int ksz8_fdb_add(struct ksz_device *dev, int port, const unsigned char *addr,
+ u16 vid, struct dsa_db db);
+int ksz8_fdb_del(struct ksz_device *dev, int port, const unsigned char *addr,
+ u16 vid, struct dsa_db db);
int ksz8_mdb_add(struct ksz_device *dev, int port,
const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
int ksz8_mdb_del(struct ksz_device *dev, int port,
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 3fffd5da8d3b..f56fca1b1a22 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -96,7 +96,7 @@ static int ksz8795_change_mtu(struct ksz_device *dev, int frame_size)
if (frame_size > KSZ8_LEGAL_PACKET_SIZE)
ctrl2 |= SW_LEGAL_PACKET_DISABLE;
- else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
+ if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
ctrl1 |= SW_HUGE_PACKET;
ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_HUGE_PACKET, ctrl1);
@@ -336,34 +336,48 @@ void ksz8_port_init_cnt(struct ksz_device *dev, int port)
}
}
-static void ksz8_r_table(struct ksz_device *dev, int table, u16 addr, u64 *data)
+static int ksz8_r_table(struct ksz_device *dev, int table, u16 addr, u64 *data)
{
const u16 *regs;
u16 ctrl_addr;
+ int ret;
regs = dev->info->regs;
ctrl_addr = IND_ACC_TABLE(table | TABLE_READ) | addr;
mutex_lock(&dev->alu_mutex);
- ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
- ksz_read64(dev, regs[REG_IND_DATA_HI], data);
+ ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+ if (ret)
+ goto unlock_alu;
+
+ ret = ksz_read64(dev, regs[REG_IND_DATA_HI], data);
+unlock_alu:
mutex_unlock(&dev->alu_mutex);
+
+ return ret;
}
-static void ksz8_w_table(struct ksz_device *dev, int table, u16 addr, u64 data)
+static int ksz8_w_table(struct ksz_device *dev, int table, u16 addr, u64 data)
{
const u16 *regs;
u16 ctrl_addr;
+ int ret;
regs = dev->info->regs;
ctrl_addr = IND_ACC_TABLE(table) | addr;
mutex_lock(&dev->alu_mutex);
- ksz_write64(dev, regs[REG_IND_DATA_HI], data);
- ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+ ret = ksz_write64(dev, regs[REG_IND_DATA_HI], data);
+ if (ret)
+ goto unlock_alu;
+
+ ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+unlock_alu:
mutex_unlock(&dev->alu_mutex);
+
+ return ret;
}
static int ksz8_valid_dyn_entry(struct ksz_device *dev, u8 *data)
@@ -457,46 +471,54 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
return rc;
}
-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu)
+static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu, bool *valid)
{
u32 data_hi, data_lo;
const u8 *shifts;
const u32 *masks;
u64 data;
+ int ret;
shifts = dev->info->shifts;
masks = dev->info->masks;
- ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
+ ret = ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
+ if (ret)
+ return ret;
+
data_hi = data >> 32;
data_lo = (u32)data;
- if (data_hi & (masks[STATIC_MAC_TABLE_VALID] |
- masks[STATIC_MAC_TABLE_OVERRIDE])) {
- alu->mac[5] = (u8)data_lo;
- alu->mac[4] = (u8)(data_lo >> 8);
- alu->mac[3] = (u8)(data_lo >> 16);
- alu->mac[2] = (u8)(data_lo >> 24);
- alu->mac[1] = (u8)data_hi;
- alu->mac[0] = (u8)(data_hi >> 8);
- alu->port_forward =
- (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
- shifts[STATIC_MAC_FWD_PORTS];
- alu->is_override =
- (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
- data_hi >>= 1;
- alu->is_static = true;
- alu->is_use_fid =
- (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
- alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
- shifts[STATIC_MAC_FID];
+
+ if (!(data_hi & (masks[STATIC_MAC_TABLE_VALID] |
+ masks[STATIC_MAC_TABLE_OVERRIDE]))) {
+ *valid = false;
return 0;
}
- return -ENXIO;
+
+ alu->mac[5] = (u8)data_lo;
+ alu->mac[4] = (u8)(data_lo >> 8);
+ alu->mac[3] = (u8)(data_lo >> 16);
+ alu->mac[2] = (u8)(data_lo >> 24);
+ alu->mac[1] = (u8)data_hi;
+ alu->mac[0] = (u8)(data_hi >> 8);
+ alu->port_forward =
+ (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+ shifts[STATIC_MAC_FWD_PORTS];
+ alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
+ data_hi >>= 1;
+ alu->is_static = true;
+ alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+ alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+ shifts[STATIC_MAC_FID];
+
+ *valid = true;
+
+ return 0;
}
-void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
- struct alu_struct *alu)
+static int ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu)
{
u32 data_hi, data_lo;
const u8 *shifts;
@@ -524,7 +546,8 @@ void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
data_hi &= ~masks[STATIC_MAC_TABLE_OVERRIDE];
data = (u64)data_hi << 32 | data_lo;
- ksz8_w_table(dev, TABLE_STATIC_MAC, addr, data);
+
+ return ksz8_w_table(dev, TABLE_STATIC_MAC, addr, data);
}
static void ksz8_from_vlan(struct ksz_device *dev, u32 vlan, u8 *fid,
@@ -977,24 +1000,29 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port,
return ret;
}
-int ksz8_mdb_add(struct ksz_device *dev, int port,
- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+static int ksz8_add_sta_mac(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid)
{
struct alu_struct alu;
- int index;
+ int index, ret;
int empty = 0;
alu.port_forward = 0;
for (index = 0; index < dev->info->num_statics; index++) {
- if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
- /* Found one already in static MAC table. */
- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
- alu.fid == mdb->vid)
- break;
- /* Remember the first empty entry. */
- } else if (!empty) {
- empty = index + 1;
+ bool valid;
+
+ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
+ if (ret)
+ return ret;
+ if (!valid) {
+ /* Remember the first empty entry. */
+ if (!empty)
+ empty = index + 1;
+ continue;
}
+
+ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
+ break;
}
/* no available entry */
@@ -1005,48 +1033,73 @@ int ksz8_mdb_add(struct ksz_device *dev, int port,
if (index == dev->info->num_statics) {
index = empty - 1;
memset(&alu, 0, sizeof(alu));
- memcpy(alu.mac, mdb->addr, ETH_ALEN);
+ memcpy(alu.mac, addr, ETH_ALEN);
alu.is_static = true;
}
alu.port_forward |= BIT(port);
- if (mdb->vid) {
+ if (vid) {
alu.is_use_fid = true;
/* Need a way to map VID to FID. */
- alu.fid = mdb->vid;
+ alu.fid = vid;
}
- ksz8_w_sta_mac_table(dev, index, &alu);
- return 0;
+ return ksz8_w_sta_mac_table(dev, index, &alu);
}
-int ksz8_mdb_del(struct ksz_device *dev, int port,
- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+static int ksz8_del_sta_mac(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid)
{
struct alu_struct alu;
- int index;
+ int index, ret;
for (index = 0; index < dev->info->num_statics; index++) {
- if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
- /* Found one already in static MAC table. */
- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
- alu.fid == mdb->vid)
- break;
- }
+ bool valid;
+
+ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
+ if (ret)
+ return ret;
+ if (!valid)
+ continue;
+
+ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
+ break;
}
/* no available entry */
if (index == dev->info->num_statics)
- goto exit;
+ return 0;
/* clear port */
alu.port_forward &= ~BIT(port);
if (!alu.port_forward)
alu.is_static = false;
- ksz8_w_sta_mac_table(dev, index, &alu);
-exit:
- return 0;
+ return ksz8_w_sta_mac_table(dev, index, &alu);
+}
+
+int ksz8_mdb_add(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+{
+ return ksz8_add_sta_mac(dev, port, mdb->addr, mdb->vid);
+}
+
+int ksz8_mdb_del(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
+{
+ return ksz8_del_sta_mac(dev, port, mdb->addr, mdb->vid);
+}
+
+int ksz8_fdb_add(struct ksz_device *dev, int port, const unsigned char *addr,
+ u16 vid, struct dsa_db db)
+{
+ return ksz8_add_sta_mac(dev, port, addr, vid);
+}
+
+int ksz8_fdb_del(struct ksz_device *dev, int port, const unsigned char *addr,
+ u16 vid, struct dsa_db db)
+{
+ return ksz8_del_sta_mac(dev, port, addr, vid);
}
int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
@@ -1346,9 +1399,7 @@ int ksz8_enable_stp_addr(struct ksz_device *dev)
alu.is_override = true;
alu.port_forward = dev->info->cpu_ports;
- ksz8_w_sta_mac_table(dev, 0, &alu);
-
- return 0;
+ return ksz8_w_sta_mac_table(dev, 0, &alu);
}
int ksz8_setup(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 5568d7b22135..a4428be5f483 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -200,6 +200,8 @@ static const struct ksz_dev_ops ksz8_dev_ops = {
.freeze_mib = ksz8_freeze_mib,
.port_init_cnt = ksz8_port_init_cnt,
.fdb_dump = ksz8_fdb_dump,
+ .fdb_add = ksz8_fdb_add,
+ .fdb_del = ksz8_fdb_del,
.mdb_add = ksz8_mdb_add,
.mdb_del = ksz8_mdb_del,
.vlan_filtering = ksz8_port_vlan_filtering,
diff --git a/drivers/net/dsa/mt7530-mdio.c b/drivers/net/dsa/mt7530-mdio.c
index 34a547b88e49..088533663b83 100644
--- a/drivers/net/dsa/mt7530-mdio.c
+++ b/drivers/net/dsa/mt7530-mdio.c
@@ -81,14 +81,17 @@ static const struct regmap_bus mt7530_regmap_bus = {
};
static int
-mt7531_create_sgmii(struct mt7530_priv *priv)
+mt7531_create_sgmii(struct mt7530_priv *priv, bool dual_sgmii)
{
- struct regmap_config *mt7531_pcs_config[2];
+ struct regmap_config *mt7531_pcs_config[2] = {};
struct phylink_pcs *pcs;
struct regmap *regmap;
int i, ret = 0;
- for (i = 0; i < 2; i++) {
+ /* MT7531AE has two SGMII units for port 5 and port 6
+ * MT7531BE has only one SGMII unit for port 6
+ */
+ for (i = dual_sgmii ? 0 : 1; i < 2; i++) {
mt7531_pcs_config[i] = devm_kzalloc(priv->dev,
sizeof(struct regmap_config),
GFP_KERNEL);
@@ -208,11 +211,8 @@ mt7530_probe(struct mdio_device *mdiodev)
if (IS_ERR(priv->regmap))
return PTR_ERR(priv->regmap);
- if (priv->id == ID_MT7531) {
- ret = mt7531_create_sgmii(priv);
- if (ret)
- return ret;
- }
+ if (priv->id == ID_MT7531)
+ priv->create_sgmii = mt7531_create_sgmii;
return dsa_register_switch(priv->ds);
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index e4bb5037d352..c680873819b0 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -3018,6 +3018,12 @@ mt753x_setup(struct dsa_switch *ds)
if (ret && priv->irq)
mt7530_free_irq_common(priv);
+ if (priv->create_sgmii) {
+ ret = priv->create_sgmii(priv, mt7531_dual_sgmii_supported(priv));
+ if (ret && priv->irq)
+ mt7530_free_irq(priv);
+ }
+
return ret;
}
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 01db5c9724fa..5084f48a8869 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -748,10 +748,10 @@ struct mt753x_info {
* registers
* @p6_interface Holding the current port 6 interface
* @p5_intf_sel: Holding the current port 5 interface select
- *
* @irq: IRQ number of the switch
* @irq_domain: IRQ domain of the switch irq_chip
* @irq_enable: IRQ enable bits, synced to SYS_INT_EN
+ * @create_sgmii: Pointer to function creating SGMII PCS instance(s)
*/
struct mt7530_priv {
struct device *dev;
@@ -770,7 +770,6 @@ struct mt7530_priv {
unsigned int p5_intf_sel;
u8 mirror_rx;
u8 mirror_tx;
-
struct mt7530_port ports[MT7530_NUM_PORTS];
struct mt753x_pcs pcs[MT7530_NUM_PORTS];
/* protect among processes for registers access*/
@@ -778,6 +777,7 @@ struct mt7530_priv {
int irq;
struct irq_domain *irq_domain;
u32 irq_enable;
+ int (*create_sgmii)(struct mt7530_priv *priv, bool dual_sgmii);
};
struct mt7530_hw_vlan_entry {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 62a126402983..dc263cea205f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -611,10 +611,10 @@ static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
}
static const u8 mv88e6xxx_phy_interface_modes[] = {
- [MV88E6XXX_PORT_STS_CMODE_MII_PHY] = PHY_INTERFACE_MODE_MII,
+ [MV88E6XXX_PORT_STS_CMODE_MII_PHY] = PHY_INTERFACE_MODE_REVMII,
[MV88E6XXX_PORT_STS_CMODE_MII] = PHY_INTERFACE_MODE_MII,
[MV88E6XXX_PORT_STS_CMODE_GMII] = PHY_INTERFACE_MODE_GMII,
- [MV88E6XXX_PORT_STS_CMODE_RMII_PHY] = PHY_INTERFACE_MODE_RMII,
+ [MV88E6XXX_PORT_STS_CMODE_RMII_PHY] = PHY_INTERFACE_MODE_REVRMII,
[MV88E6XXX_PORT_STS_CMODE_RMII] = PHY_INTERFACE_MODE_RMII,
[MV88E6XXX_PORT_STS_CMODE_100BASEX] = PHY_INTERFACE_MODE_100BASEX,
[MV88E6XXX_PORT_STS_CMODE_1000BASEX] = PHY_INTERFACE_MODE_1000BASEX,
@@ -5613,7 +5613,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
* .port_set_upstream_port method.
*/
.set_egress_port = mv88e6393x_set_egress_port,
- .watchdog_ops = &mv88e6390_watchdog_ops,
+ .watchdog_ops = &mv88e6393x_watchdog_ops,
.mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu,
.pot_clear = mv88e6xxx_g2_pot_clear,
.reset = mv88e6352_g1_reset,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index a26546d3d7b5..615896893076 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -943,6 +943,26 @@ const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {
.irq_free = mv88e6390_watchdog_free,
};
+static int mv88e6393x_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
+{
+ mv88e6390_watchdog_action(chip, irq);
+
+ /* Fix for clearing the force WD event bit.
+ * Unreleased erratum on mv88e6393x.
+ */
+ mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL,
+ MV88E6390_G2_WDOG_CTL_UPDATE |
+ MV88E6390_G2_WDOG_CTL_PTR_EVENT);
+
+ return IRQ_HANDLED;
+}
+
+const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops = {
+ .irq_action = mv88e6393x_watchdog_action,
+ .irq_setup = mv88e6390_watchdog_setup,
+ .irq_free = mv88e6390_watchdog_free,
+};
+
static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id)
{
struct mv88e6xxx_chip *chip = dev_id;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index e973114d6890..7e091965582b 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -369,6 +369,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops;
extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
+extern const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops;
extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops;
extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 6dcebcfd71e7..80861ac090ae 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1550,11 +1550,6 @@ static int felix_connect_tag_protocol(struct dsa_switch *ds,
}
}
-/* Hardware initialization done here so that we can allocate structures with
- * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
- * us to allocate structures twice (leak memory) and map PCI memory twice
- * (which will not work).
- */
static int felix_setup(struct dsa_switch *ds)
{
struct ocelot *ocelot = ds->priv;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index dddb28984bdf..cfb3faeaa5bf 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1424,6 +1424,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
mutex_lock(&ocelot->tas_lock);
if (!taprio->enable) {
+ ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE,
QSYS_TAG_CONFIG, port);
@@ -1436,15 +1437,19 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
return 0;
}
+ ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
+ if (ret)
+ goto err_unlock;
+
if (taprio->cycle_time > NSEC_PER_SEC ||
taprio->cycle_time_extension >= NSEC_PER_SEC) {
ret = -EINVAL;
- goto err;
+ goto err_reset_tc;
}
if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) {
ret = -ERANGE;
- goto err;
+ goto err_reset_tc;
}
/* Enable guard band. The switch will schedule frames without taking
@@ -1468,7 +1473,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
ret = -EBUSY;
- goto err;
+ goto err_reset_tc;
}
ocelot_rmw_rix(ocelot,
@@ -1503,12 +1508,19 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
!(val & QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE),
10, 100000);
if (ret)
- goto err;
+ goto err_reset_tc;
ocelot_port->taprio = taprio_offload_get(taprio);
vsc9959_tas_guard_bands_update(ocelot, port);
-err:
+ mutex_unlock(&ocelot->tas_lock);
+
+ return 0;
+
+err_reset_tc:
+ taprio->mqprio.qopt.num_tc = 0;
+ ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
+err_unlock:
mutex_unlock(&ocelot->tas_lock);
return ret;
@@ -1612,6 +1624,13 @@ static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
static int vsc9959_qos_query_caps(struct tc_query_caps_base *base)
{
switch (base->type) {
+ case TC_SETUP_QDISC_MQPRIO: {
+ struct tc_mqprio_caps *caps = base->caps;
+
+ caps->validate_queue_counts = true;
+
+ return 0;
+ }
case TC_SETUP_QDISC_TAPRIO: {
struct tc_taprio_caps *caps = base->caps;
@@ -1635,6 +1654,8 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
return vsc9959_qos_query_caps(type_data);
case TC_SETUP_QDISC_TAPRIO:
return vsc9959_qos_port_tas_set(ocelot, port, type_data);
+ case TC_SETUP_QDISC_MQPRIO:
+ return ocelot_port_mqprio(ocelot, port, type_data);
case TC_SETUP_QDISC_CBS:
return vsc9959_qos_port_cbs_set(ds, port, type_data);
default:
@@ -2498,6 +2519,7 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot)
for (port = 0; port < ocelot->num_phys_ports; port++) {
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct ocelot_mm_state *mm = &ocelot->mm[port];
int min_speed = ocelot_port->speed;
unsigned long mask = 0;
u32 tmp, val = 0;
@@ -2538,10 +2560,12 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot)
/* Enable cut-through forwarding for all traffic classes that
* don't have oversized dropping enabled, since this check is
- * bypassed in cut-through mode.
+ * bypassed in cut-through mode. Also exclude preemptible
+ * traffic classes, since these would hang the port for some
+ * reason, if sent as cut-through.
*/
if (ocelot_port->speed == min_speed) {
- val = GENMASK(7, 0);
+ val = GENMASK(7, 0) & ~mm->active_preemptible_tcs;
for (tc = 0; tc < OCELOT_NUM_TC; tc++)
if (vsc9959_port_qmaxsdu_get(ocelot, port, tc))
@@ -2610,12 +2634,9 @@ static const struct felix_info felix_info_vsc9959 = {
static irqreturn_t felix_irq_handler(int irq, void *data)
{
struct ocelot *ocelot = (struct ocelot *)data;
- int port;
ocelot_get_txtstamp(ocelot);
-
- for (port = 0; port < ocelot->num_phys_ports; port++)
- ocelot_port_mm_irq(ocelot, port);
+ ocelot_mm_irq(ocelot);
return IRQ_HANDLED;
}
diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig
index ba339747362c..7a86d6d6a246 100644
--- a/drivers/net/dsa/qca/Kconfig
+++ b/drivers/net/dsa/qca/Kconfig
@@ -15,3 +15,11 @@ config NET_DSA_QCA8K
help
This enables support for the Qualcomm Atheros QCA8K Ethernet
switch chips.
+
+config NET_DSA_QCA8K_LEDS_SUPPORT
+ bool "Qualcomm Atheros QCA8K Ethernet switch family LEDs support"
+ depends on NET_DSA_QCA8K
+ depends on LEDS_CLASS
+ help
+ This enabled support for LEDs present on the Qualcomm Atheros
+ QCA8K Ethernet switch chips.
diff --git a/drivers/net/dsa/qca/Makefile b/drivers/net/dsa/qca/Makefile
index 701f1d199e93..ce66b1984e5f 100644
--- a/drivers/net/dsa/qca/Makefile
+++ b/drivers/net/dsa/qca/Makefile
@@ -2,3 +2,6 @@
obj-$(CONFIG_NET_DSA_AR9331) += ar9331.o
obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o
qca8k-y += qca8k-common.o qca8k-8xxx.o
+ifdef CONFIG_NET_DSA_QCA8K_LEDS_SUPPORT
+qca8k-y += qca8k-leds.o
+endif
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index 62810903f1b3..6d5ac7588a69 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -22,6 +22,7 @@
#include <linux/dsa/tag_qca.h>
#include "qca8k.h"
+#include "qca8k_leds.h"
static void
qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
@@ -772,21 +773,6 @@ err_clear_skb:
return ret;
}
-static u32
-qca8k_port_to_phy(int port)
-{
- /* From Andrew Lunn:
- * Port 0 has no internal phy.
- * Port 1 has an internal PHY at MDIO address 0.
- * Port 2 has an internal PHY at MDIO address 1.
- * ...
- * Port 5 has an internal PHY at MDIO address 4.
- * Port 6 has no internal PHY.
- */
-
- return port - 1;
-}
-
static int
qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
{
@@ -1797,6 +1783,10 @@ qca8k_setup(struct dsa_switch *ds)
if (ret)
return ret;
+ ret = qca8k_setup_led_ctrl(priv);
+ if (ret)
+ return ret;
+
qca8k_setup_pcs(priv, &priv->pcs_port_0, 0);
qca8k_setup_pcs(priv, &priv->pcs_port_6, 6);
diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c
new file mode 100644
index 000000000000..b883692b7d86
--- /dev/null
+++ b/drivers/net/dsa/qca/qca8k-leds.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/regmap.h>
+#include <net/dsa.h>
+
+#include "qca8k.h"
+#include "qca8k_leds.h"
+
+static int
+qca8k_get_enable_led_reg(int port_num, int led_num, struct qca8k_led_pattern_en *reg_info)
+{
+ switch (port_num) {
+ case 0:
+ reg_info->reg = QCA8K_LED_CTRL_REG(led_num);
+ reg_info->shift = QCA8K_LED_PHY0123_CONTROL_RULE_SHIFT;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ /* Port 123 are controlled on a different reg */
+ reg_info->reg = QCA8K_LED_CTRL3_REG;
+ reg_info->shift = QCA8K_LED_PHY123_PATTERN_EN_SHIFT(port_num, led_num);
+ break;
+ case 4:
+ reg_info->reg = QCA8K_LED_CTRL_REG(led_num);
+ reg_info->shift = QCA8K_LED_PHY4_CONTROL_RULE_SHIFT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+qca8k_led_brightness_set(struct qca8k_led *led,
+ enum led_brightness brightness)
+{
+ struct qca8k_led_pattern_en reg_info;
+ struct qca8k_priv *priv = led->priv;
+ u32 mask, val;
+
+ qca8k_get_enable_led_reg(led->port_num, led->led_num, &reg_info);
+
+ val = QCA8K_LED_ALWAYS_OFF;
+ if (brightness)
+ val = QCA8K_LED_ALWAYS_ON;
+
+ /* HW regs to control brightness is special and port 1-2-3
+ * are placed in a different reg.
+ *
+ * To control port 0 brightness:
+ * - the 2 bit (15, 14) of:
+ * - QCA8K_LED_CTRL0_REG for led1
+ * - QCA8K_LED_CTRL1_REG for led2
+ * - QCA8K_LED_CTRL2_REG for led3
+ *
+ * To control port 4:
+ * - the 2 bit (31, 30) of:
+ * - QCA8K_LED_CTRL0_REG for led1
+ * - QCA8K_LED_CTRL1_REG for led2
+ * - QCA8K_LED_CTRL2_REG for led3
+ *
+ * To control port 1:
+ * - the 2 bit at (9, 8) of QCA8K_LED_CTRL3_REG are used for led1
+ * - the 2 bit at (11, 10) of QCA8K_LED_CTRL3_REG are used for led2
+ * - the 2 bit at (13, 12) of QCA8K_LED_CTRL3_REG are used for led3
+ *
+ * To control port 2:
+ * - the 2 bit at (15, 14) of QCA8K_LED_CTRL3_REG are used for led1
+ * - the 2 bit at (17, 16) of QCA8K_LED_CTRL3_REG are used for led2
+ * - the 2 bit at (19, 18) of QCA8K_LED_CTRL3_REG are used for led3
+ *
+ * To control port 3:
+ * - the 2 bit at (21, 20) of QCA8K_LED_CTRL3_REG are used for led1
+ * - the 2 bit at (23, 22) of QCA8K_LED_CTRL3_REG are used for led2
+ * - the 2 bit at (25, 24) of QCA8K_LED_CTRL3_REG are used for led3
+ *
+ * To abstract this and have less code, we use the port and led numm
+ * to calculate the shift and the correct reg due to this problem of
+ * not having a 1:1 map of LED with the regs.
+ */
+ if (led->port_num == 0 || led->port_num == 4) {
+ mask = QCA8K_LED_PATTERN_EN_MASK;
+ val <<= QCA8K_LED_PATTERN_EN_SHIFT;
+ } else {
+ mask = QCA8K_LED_PHY123_PATTERN_EN_MASK;
+ }
+
+ return regmap_update_bits(priv->regmap, reg_info.reg,
+ mask << reg_info.shift,
+ val << reg_info.shift);
+}
+
+static int
+qca8k_cled_brightness_set_blocking(struct led_classdev *ldev,
+ enum led_brightness brightness)
+{
+ struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+
+ return qca8k_led_brightness_set(led, brightness);
+}
+
+static enum led_brightness
+qca8k_led_brightness_get(struct qca8k_led *led)
+{
+ struct qca8k_led_pattern_en reg_info;
+ struct qca8k_priv *priv = led->priv;
+ u32 val;
+ int ret;
+
+ qca8k_get_enable_led_reg(led->port_num, led->led_num, &reg_info);
+
+ ret = regmap_read(priv->regmap, reg_info.reg, &val);
+ if (ret)
+ return 0;
+
+ val >>= reg_info.shift;
+
+ if (led->port_num == 0 || led->port_num == 4) {
+ val &= QCA8K_LED_PATTERN_EN_MASK;
+ val >>= QCA8K_LED_PATTERN_EN_SHIFT;
+ } else {
+ val &= QCA8K_LED_PHY123_PATTERN_EN_MASK;
+ }
+
+ /* Assume brightness ON only when the LED is set to always ON */
+ return val == QCA8K_LED_ALWAYS_ON;
+}
+
+static int
+qca8k_cled_blink_set(struct led_classdev *ldev,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ struct qca8k_led *led = container_of(ldev, struct qca8k_led, cdev);
+ u32 mask, val = QCA8K_LED_ALWAYS_BLINK_4HZ;
+ struct qca8k_led_pattern_en reg_info;
+ struct qca8k_priv *priv = led->priv;
+
+ if (*delay_on == 0 && *delay_off == 0) {
+ *delay_on = 125;
+ *delay_off = 125;
+ }
+
+ if (*delay_on != 125 || *delay_off != 125) {
+ /* The hardware only supports blinking at 4Hz. Fall back
+ * to software implementation in other cases.
+ */
+ return -EINVAL;
+ }
+
+ qca8k_get_enable_led_reg(led->port_num, led->led_num, &reg_info);
+
+ if (led->port_num == 0 || led->port_num == 4) {
+ mask = QCA8K_LED_PATTERN_EN_MASK;
+ val <<= QCA8K_LED_PATTERN_EN_SHIFT;
+ } else {
+ mask = QCA8K_LED_PHY123_PATTERN_EN_MASK;
+ }
+
+ regmap_update_bits(priv->regmap, reg_info.reg, mask << reg_info.shift,
+ val << reg_info.shift);
+
+ return 0;
+}
+
+static int
+qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int port_num)
+{
+ struct fwnode_handle *led = NULL, *leds = NULL;
+ struct led_init_data init_data = { };
+ struct dsa_switch *ds = priv->ds;
+ enum led_default_state state;
+ struct qca8k_led *port_led;
+ int led_num, led_index;
+ int ret;
+
+ leds = fwnode_get_named_child_node(port, "leds");
+ if (!leds) {
+ dev_dbg(priv->dev, "No Leds node specified in device tree for port %d!\n",
+ port_num);
+ return 0;
+ }
+
+ fwnode_for_each_child_node(leds, led) {
+ /* Reg represent the led number of the port.
+ * Each port can have at most 3 leds attached
+ * Commonly:
+ * 1. is gigabit led
+ * 2. is mbit led
+ * 3. additional status led
+ */
+ if (fwnode_property_read_u32(led, "reg", &led_num))
+ continue;
+
+ if (led_num >= QCA8K_LED_PORT_COUNT) {
+ dev_warn(priv->dev, "Invalid LED reg %d defined for port %d",
+ led_num, port_num);
+ continue;
+ }
+
+ led_index = QCA8K_LED_PORT_INDEX(port_num, led_num);
+
+ port_led = &priv->ports_led[led_index];
+ port_led->port_num = port_num;
+ port_led->led_num = led_num;
+ port_led->priv = priv;
+
+ state = led_init_default_state_get(led);
+ switch (state) {
+ case LEDS_DEFSTATE_ON:
+ port_led->cdev.brightness = 1;
+ qca8k_led_brightness_set(port_led, 1);
+ break;
+ case LEDS_DEFSTATE_KEEP:
+ port_led->cdev.brightness =
+ qca8k_led_brightness_get(port_led);
+ break;
+ default:
+ port_led->cdev.brightness = 0;
+ qca8k_led_brightness_set(port_led, 0);
+ }
+
+ port_led->cdev.max_brightness = 1;
+ port_led->cdev.brightness_set_blocking = qca8k_cled_brightness_set_blocking;
+ port_led->cdev.blink_set = qca8k_cled_blink_set;
+ init_data.default_label = ":port";
+ init_data.fwnode = led;
+ init_data.devname_mandatory = true;
+ init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->slave_mii_bus->id,
+ port_num);
+ if (!init_data.devicename)
+ return -ENOMEM;
+
+ ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data);
+ if (ret)
+ dev_warn(priv->dev, "Failed to init LED %d for port %d", led_num, port_num);
+
+ kfree(init_data.devicename);
+ }
+
+ return 0;
+}
+
+int
+qca8k_setup_led_ctrl(struct qca8k_priv *priv)
+{
+ struct fwnode_handle *ports, *port;
+ int port_num;
+ int ret;
+
+ ports = device_get_named_child_node(priv->dev, "ports");
+ if (!ports) {
+ dev_info(priv->dev, "No ports node specified in device tree!");
+ return 0;
+ }
+
+ fwnode_for_each_child_node(ports, port) {
+ if (fwnode_property_read_u32(port, "reg", &port_num))
+ continue;
+
+ /* Skip checking for CPU port 0 and CPU port 6 as not supported */
+ if (port_num == 0 || port_num == 6)
+ continue;
+
+ /* Each port can have at most 3 different leds attached.
+ * Switch port starts from 0 to 6, but port 0 and 6 are CPU
+ * port. The port index needs to be decreased by one to identify
+ * the correct port for LED setup.
+ */
+ ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h
index 7996975d29d3..c5cc8a172d65 100644
--- a/drivers/net/dsa/qca/qca8k.h
+++ b/drivers/net/dsa/qca/qca8k.h
@@ -11,6 +11,7 @@
#include <linux/delay.h>
#include <linux/regmap.h>
#include <linux/gpio.h>
+#include <linux/leds.h>
#include <linux/dsa/tag_qca.h>
#define QCA8K_ETHERNET_MDIO_PRIORITY 7
@@ -85,6 +86,51 @@
#define QCA8K_MDIO_MASTER_DATA(x) FIELD_PREP(QCA8K_MDIO_MASTER_DATA_MASK, x)
#define QCA8K_MDIO_MASTER_MAX_PORTS 5
#define QCA8K_MDIO_MASTER_MAX_REG 32
+
+/* LED control register */
+#define QCA8K_LED_PORT_COUNT 3
+#define QCA8K_LED_COUNT ((QCA8K_NUM_PORTS - QCA8K_NUM_CPU_PORTS) * QCA8K_LED_PORT_COUNT)
+#define QCA8K_LED_RULE_COUNT 6
+#define QCA8K_LED_RULE_MAX 11
+#define QCA8K_LED_PORT_INDEX(_phy, _led) (((_phy) * QCA8K_LED_PORT_COUNT) + (_led))
+
+#define QCA8K_LED_PHY123_PATTERN_EN_SHIFT(_phy, _led) ((((_phy) - 1) * 6) + 8 + (2 * (_led)))
+#define QCA8K_LED_PHY123_PATTERN_EN_MASK GENMASK(1, 0)
+
+#define QCA8K_LED_PHY0123_CONTROL_RULE_SHIFT 0
+#define QCA8K_LED_PHY4_CONTROL_RULE_SHIFT 16
+
+#define QCA8K_LED_CTRL_REG(_i) (0x050 + (_i) * 4)
+#define QCA8K_LED_CTRL0_REG 0x50
+#define QCA8K_LED_CTRL1_REG 0x54
+#define QCA8K_LED_CTRL2_REG 0x58
+#define QCA8K_LED_CTRL3_REG 0x5C
+#define QCA8K_LED_CTRL_SHIFT(_i) (((_i) % 2) * 16)
+#define QCA8K_LED_CTRL_MASK GENMASK(15, 0)
+#define QCA8K_LED_RULE_MASK GENMASK(13, 0)
+#define QCA8K_LED_BLINK_FREQ_MASK GENMASK(1, 0)
+#define QCA8K_LED_BLINK_FREQ_SHITF 0
+#define QCA8K_LED_BLINK_2HZ 0
+#define QCA8K_LED_BLINK_4HZ 1
+#define QCA8K_LED_BLINK_8HZ 2
+#define QCA8K_LED_BLINK_AUTO 3
+#define QCA8K_LED_LINKUP_OVER_MASK BIT(2)
+#define QCA8K_LED_TX_BLINK_MASK BIT(4)
+#define QCA8K_LED_RX_BLINK_MASK BIT(5)
+#define QCA8K_LED_COL_BLINK_MASK BIT(7)
+#define QCA8K_LED_LINK_10M_EN_MASK BIT(8)
+#define QCA8K_LED_LINK_100M_EN_MASK BIT(9)
+#define QCA8K_LED_LINK_1000M_EN_MASK BIT(10)
+#define QCA8K_LED_POWER_ON_LIGHT_MASK BIT(11)
+#define QCA8K_LED_HALF_DUPLEX_MASK BIT(12)
+#define QCA8K_LED_FULL_DUPLEX_MASK BIT(13)
+#define QCA8K_LED_PATTERN_EN_MASK GENMASK(15, 14)
+#define QCA8K_LED_PATTERN_EN_SHIFT 14
+#define QCA8K_LED_ALWAYS_OFF 0
+#define QCA8K_LED_ALWAYS_BLINK_4HZ 1
+#define QCA8K_LED_ALWAYS_ON 2
+#define QCA8K_LED_RULE_CONTROLLED 3
+
#define QCA8K_GOL_MAC_ADDR0 0x60
#define QCA8K_GOL_MAC_ADDR1 0x64
#define QCA8K_MAX_FRAME_SIZE 0x78
@@ -382,6 +428,19 @@ struct qca8k_pcs {
int port;
};
+struct qca8k_led_pattern_en {
+ u32 reg;
+ u8 shift;
+};
+
+struct qca8k_led {
+ u8 port_num;
+ u8 led_num;
+ u16 old_rule;
+ struct qca8k_priv *priv;
+ struct led_classdev cdev;
+};
+
struct qca8k_priv {
u8 switch_id;
u8 switch_revision;
@@ -406,6 +465,7 @@ struct qca8k_priv {
struct qca8k_pcs pcs_port_0;
struct qca8k_pcs pcs_port_6;
const struct qca8k_match_data *info;
+ struct qca8k_led ports_led[QCA8K_LED_COUNT];
};
struct qca8k_mib_desc {
@@ -421,6 +481,20 @@ struct qca8k_fdb {
u8 mac[6];
};
+static inline u32 qca8k_port_to_phy(int port)
+{
+ /* From Andrew Lunn:
+ * Port 0 has no internal phy.
+ * Port 1 has an internal PHY at MDIO address 0.
+ * Port 2 has an internal PHY at MDIO address 1.
+ * ...
+ * Port 5 has an internal PHY at MDIO address 4.
+ * Port 6 has no internal PHY.
+ */
+
+ return port - 1;
+}
+
/* Common setup function */
extern const struct qca8k_mib_desc ar8327_mib[];
extern const struct regmap_access_table qca8k_readable_table;
diff --git a/drivers/net/dsa/qca/qca8k_leds.h b/drivers/net/dsa/qca/qca8k_leds.h
new file mode 100644
index 000000000000..ab367f05b173
--- /dev/null
+++ b/drivers/net/dsa/qca/qca8k_leds.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __QCA8K_LEDS_H
+#define __QCA8K_LEDS_H
+
+/* Leds Support function */
+#ifdef CONFIG_NET_DSA_QCA8K_LEDS_SUPPORT
+int qca8k_setup_led_ctrl(struct qca8k_priv *priv);
+#else
+static inline int qca8k_setup_led_ctrl(struct qca8k_priv *priv)
+{
+ return 0;
+}
+#endif
+
+#endif /* __QCA8K_LEDS_H */
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index ab42f75b9413..235fcacef5c5 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -186,4 +186,16 @@ config AMD_XGBE_HAVE_ECC
bool
default n
+config PDS_CORE
+ tristate "AMD/Pensando Data Systems Core Device Support"
+ depends on 64BIT && PCI
+ help
+ This enables the support for the AMD/Pensando Core device family of
+ adapters. More specific information on this driver can be
+ found in
+ <file:Documentation/networking/device_drivers/ethernet/amd/pds_core.rst>.
+
+ To compile this driver as a module, choose M here. The module
+ will be called pds_core.
+
endif # NET_VENDOR_AMD
diff --git a/drivers/net/ethernet/amd/Makefile b/drivers/net/ethernet/amd/Makefile
index 42742afe9115..2dcfb84731e1 100644
--- a/drivers/net/ethernet/amd/Makefile
+++ b/drivers/net/ethernet/amd/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PCNET32) += pcnet32.o
obj-$(CONFIG_SUN3LANCE) += sun3lance.o
obj-$(CONFIG_SUNLANCE) += sunlance.o
obj-$(CONFIG_AMD_XGBE) += xgbe/
+obj-$(CONFIG_PDS_CORE) += pds_core/
diff --git a/drivers/net/ethernet/amd/pds_core/Makefile b/drivers/net/ethernet/amd/pds_core/Makefile
new file mode 100644
index 000000000000..0abc33ce826c
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+
+obj-$(CONFIG_PDS_CORE) := pds_core.o
+
+pds_core-y := main.o \
+ devlink.o \
+ auxbus.o \
+ dev.o \
+ adminq.o \
+ core.o \
+ fw.o
+
+pds_core-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c
new file mode 100644
index 000000000000..045fe133f6ee
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/adminq.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/dynamic_debug.h>
+
+#include "core.h"
+
+struct pdsc_wait_context {
+ struct pdsc_qcq *qcq;
+ struct completion wait_completion;
+};
+
+static int pdsc_process_notifyq(struct pdsc_qcq *qcq)
+{
+ union pds_core_notifyq_comp *comp;
+ struct pdsc *pdsc = qcq->pdsc;
+ struct pdsc_cq *cq = &qcq->cq;
+ struct pdsc_cq_info *cq_info;
+ int nq_work = 0;
+ u64 eid;
+
+ cq_info = &cq->info[cq->tail_idx];
+ comp = cq_info->comp;
+ eid = le64_to_cpu(comp->event.eid);
+ while (eid > pdsc->last_eid) {
+ u16 ecode = le16_to_cpu(comp->event.ecode);
+
+ switch (ecode) {
+ case PDS_EVENT_LINK_CHANGE:
+ dev_info(pdsc->dev, "NotifyQ LINK_CHANGE ecode %d eid %lld\n",
+ ecode, eid);
+ pdsc_notify(PDS_EVENT_LINK_CHANGE, comp);
+ break;
+
+ case PDS_EVENT_RESET:
+ dev_info(pdsc->dev, "NotifyQ RESET ecode %d eid %lld\n",
+ ecode, eid);
+ pdsc_notify(PDS_EVENT_RESET, comp);
+ break;
+
+ case PDS_EVENT_XCVR:
+ dev_info(pdsc->dev, "NotifyQ XCVR ecode %d eid %lld\n",
+ ecode, eid);
+ break;
+
+ default:
+ dev_info(pdsc->dev, "NotifyQ ecode %d eid %lld\n",
+ ecode, eid);
+ break;
+ }
+
+ pdsc->last_eid = eid;
+ cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+ cq_info = &cq->info[cq->tail_idx];
+ comp = cq_info->comp;
+ eid = le64_to_cpu(comp->event.eid);
+
+ nq_work++;
+ }
+
+ qcq->accum_work += nq_work;
+
+ return nq_work;
+}
+
+void pdsc_process_adminq(struct pdsc_qcq *qcq)
+{
+ union pds_core_adminq_comp *comp;
+ struct pdsc_queue *q = &qcq->q;
+ struct pdsc *pdsc = qcq->pdsc;
+ struct pdsc_cq *cq = &qcq->cq;
+ struct pdsc_q_info *q_info;
+ unsigned long irqflags;
+ int nq_work = 0;
+ int aq_work = 0;
+ int credits;
+
+ /* Don't process AdminQ when shutting down */
+ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
+ dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+ __func__);
+ return;
+ }
+
+ /* Check for NotifyQ event */
+ nq_work = pdsc_process_notifyq(&pdsc->notifyqcq);
+
+ /* Check for empty queue, which can happen if the interrupt was
+ * for a NotifyQ event and there are no new AdminQ completions.
+ */
+ if (q->tail_idx == q->head_idx)
+ goto credits;
+
+ /* Find the first completion to clean,
+ * run the callback in the related q_info,
+ * and continue while we still match done color
+ */
+ spin_lock_irqsave(&pdsc->adminq_lock, irqflags);
+ comp = cq->info[cq->tail_idx].comp;
+ while (pdsc_color_match(comp->color, cq->done_color)) {
+ q_info = &q->info[q->tail_idx];
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
+
+ /* Copy out the completion data */
+ memcpy(q_info->dest, comp, sizeof(*comp));
+
+ complete_all(&q_info->wc->wait_completion);
+
+ if (cq->tail_idx == cq->num_descs - 1)
+ cq->done_color = !cq->done_color;
+ cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+ comp = cq->info[cq->tail_idx].comp;
+
+ aq_work++;
+ }
+ spin_unlock_irqrestore(&pdsc->adminq_lock, irqflags);
+
+ qcq->accum_work += aq_work;
+
+credits:
+ /* Return the interrupt credits, one for each completion */
+ credits = nq_work + aq_work;
+ if (credits)
+ pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx],
+ credits,
+ PDS_CORE_INTR_CRED_REARM);
+}
+
+void pdsc_work_thread(struct work_struct *work)
+{
+ struct pdsc_qcq *qcq = container_of(work, struct pdsc_qcq, work);
+
+ pdsc_process_adminq(qcq);
+}
+
+irqreturn_t pdsc_adminq_isr(int irq, void *data)
+{
+ struct pdsc_qcq *qcq = data;
+ struct pdsc *pdsc = qcq->pdsc;
+
+ /* Don't process AdminQ when shutting down */
+ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
+ dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+ __func__);
+ return IRQ_HANDLED;
+ }
+
+ queue_work(pdsc->wq, &qcq->work);
+ pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR);
+
+ return IRQ_HANDLED;
+}
+
+static int __pdsc_adminq_post(struct pdsc *pdsc,
+ struct pdsc_qcq *qcq,
+ union pds_core_adminq_cmd *cmd,
+ union pds_core_adminq_comp *comp,
+ struct pdsc_wait_context *wc)
+{
+ struct pdsc_queue *q = &qcq->q;
+ struct pdsc_q_info *q_info;
+ unsigned long irqflags;
+ unsigned int avail;
+ int index;
+ int ret;
+
+ spin_lock_irqsave(&pdsc->adminq_lock, irqflags);
+
+ /* Check for space in the queue */
+ avail = q->tail_idx;
+ if (q->head_idx >= avail)
+ avail += q->num_descs - q->head_idx - 1;
+ else
+ avail -= q->head_idx + 1;
+ if (!avail) {
+ ret = -ENOSPC;
+ goto err_out_unlock;
+ }
+
+ /* Check that the FW is running */
+ if (!pdsc_is_fw_running(pdsc)) {
+ u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+ dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n",
+ __func__, fw_status);
+ ret = -ENXIO;
+
+ goto err_out_unlock;
+ }
+
+ /* Post the request */
+ index = q->head_idx;
+ q_info = &q->info[index];
+ q_info->wc = wc;
+ q_info->dest = comp;
+ memcpy(q_info->desc, cmd, sizeof(*cmd));
+
+ dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n",
+ q->head_idx, q->tail_idx);
+ dev_dbg(pdsc->dev, "post admin queue command:\n");
+ dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+ cmd, sizeof(*cmd), true);
+
+ q->head_idx = (q->head_idx + 1) & (q->num_descs - 1);
+
+ pds_core_dbell_ring(pdsc->kern_dbpage,
+ q->hw_type, q->dbval | q->head_idx);
+ ret = index;
+
+err_out_unlock:
+ spin_unlock_irqrestore(&pdsc->adminq_lock, irqflags);
+ return ret;
+}
+
+int pdsc_adminq_post(struct pdsc *pdsc,
+ union pds_core_adminq_cmd *cmd,
+ union pds_core_adminq_comp *comp,
+ bool fast_poll)
+{
+ struct pdsc_wait_context wc = {
+ .wait_completion =
+ COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion),
+ };
+ unsigned long poll_interval = 1;
+ unsigned long poll_jiffies;
+ unsigned long time_limit;
+ unsigned long time_start;
+ unsigned long time_done;
+ unsigned long remaining;
+ int err = 0;
+ int index;
+
+ wc.qcq = &pdsc->adminqcq;
+ index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc);
+ if (index < 0) {
+ err = index;
+ goto err_out;
+ }
+
+ time_start = jiffies;
+ time_limit = time_start + HZ * pdsc->devcmd_timeout;
+ do {
+ /* Timeslice the actual wait to catch IO errors etc early */
+ poll_jiffies = msecs_to_jiffies(poll_interval);
+ remaining = wait_for_completion_timeout(&wc.wait_completion,
+ poll_jiffies);
+ if (remaining)
+ break;
+
+ if (!pdsc_is_fw_running(pdsc)) {
+ u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+ dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n",
+ __func__, fw_status);
+ err = -ENXIO;
+ break;
+ }
+
+ /* When fast_poll is not requested, prevent aggressive polling
+ * on failures due to timeouts by doing exponential back off.
+ */
+ if (!fast_poll && poll_interval < PDSC_ADMINQ_MAX_POLL_INTERVAL)
+ poll_interval <<= 1;
+ } while (time_before(jiffies, time_limit));
+ time_done = jiffies;
+ dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n",
+ __func__, jiffies_to_msecs(time_done - time_start));
+
+ /* Check the results */
+ if (time_after_eq(time_done, time_limit))
+ err = -ETIMEDOUT;
+
+ dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index);
+ dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
+ comp, sizeof(*comp), true);
+
+ if (remaining && comp->status)
+ err = pdsc_err_to_errno(comp->status);
+
+err_out:
+ if (err) {
+ dev_dbg(pdsc->dev, "%s: opcode %d status %d err %pe\n",
+ __func__, cmd->opcode, comp->status, ERR_PTR(err));
+ if (err == -ENXIO || err == -ETIMEDOUT)
+ queue_work(pdsc->wq, &pdsc->health_work);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(pdsc_adminq_post);
diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c
new file mode 100644
index 000000000000..561af8e5b3ea
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/auxbus.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+
+#include "core.h"
+#include <linux/pds/pds_auxbus.h>
+
+/**
+ * pds_client_register - Link the client to the firmware
+ * @pf_pdev: ptr to the PF driver struct
+ * @devname: name that includes service into, e.g. pds_core.vDPA
+ *
+ * Return: 0 on success, or
+ * negative for error
+ */
+int pds_client_register(struct pci_dev *pf_pdev, char *devname)
+{
+ union pds_core_adminq_comp comp = {};
+ union pds_core_adminq_cmd cmd = {};
+ struct pdsc *pf;
+ int err;
+ u16 ci;
+
+ pf = pci_get_drvdata(pf_pdev);
+ if (pf->state)
+ return -ENXIO;
+
+ cmd.client_reg.opcode = PDS_AQ_CMD_CLIENT_REG;
+ strscpy(cmd.client_reg.devname, devname,
+ sizeof(cmd.client_reg.devname));
+
+ err = pdsc_adminq_post(pf, &cmd, &comp, false);
+ if (err) {
+ dev_info(pf->dev, "register dev_name %s with DSC failed, status %d: %pe\n",
+ devname, comp.status, ERR_PTR(err));
+ return err;
+ }
+
+ ci = le16_to_cpu(comp.client_reg.client_id);
+ if (!ci) {
+ dev_err(pf->dev, "%s: device returned null client_id\n",
+ __func__);
+ return -EIO;
+ }
+
+ dev_dbg(pf->dev, "%s: device returned client_id %d for %s\n",
+ __func__, ci, devname);
+
+ return ci;
+}
+EXPORT_SYMBOL_GPL(pds_client_register);
+
+/**
+ * pds_client_unregister - Unlink the client from the firmware
+ * @pf_pdev: ptr to the PF driver struct
+ * @client_id: id returned from pds_client_register()
+ *
+ * Return: 0 on success, or
+ * negative for error
+ */
+int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id)
+{
+ union pds_core_adminq_comp comp = {};
+ union pds_core_adminq_cmd cmd = {};
+ struct pdsc *pf;
+ int err;
+
+ pf = pci_get_drvdata(pf_pdev);
+ if (pf->state)
+ return -ENXIO;
+
+ cmd.client_unreg.opcode = PDS_AQ_CMD_CLIENT_UNREG;
+ cmd.client_unreg.client_id = cpu_to_le16(client_id);
+
+ err = pdsc_adminq_post(pf, &cmd, &comp, false);
+ if (err)
+ dev_info(pf->dev, "unregister client_id %d failed, status %d: %pe\n",
+ client_id, comp.status, ERR_PTR(err));
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(pds_client_unregister);
+
+/**
+ * pds_client_adminq_cmd - Process an adminq request for the client
+ * @padev: ptr to the client device
+ * @req: ptr to buffer with request
+ * @req_len: length of actual struct used for request
+ * @resp: ptr to buffer where answer is to be copied
+ * @flags: optional flags from pds_core_adminq_flags
+ *
+ * Return: 0 on success, or
+ * negative for error
+ *
+ * Client sends pointers to request and response buffers
+ * Core copies request data into pds_core_client_request_cmd
+ * Core sets other fields as needed
+ * Core posts to AdminQ
+ * Core copies completion data into response buffer
+ */
+int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev,
+ union pds_core_adminq_cmd *req,
+ size_t req_len,
+ union pds_core_adminq_comp *resp,
+ u64 flags)
+{
+ union pds_core_adminq_cmd cmd = {};
+ struct pci_dev *pf_pdev;
+ struct pdsc *pf;
+ size_t cp_len;
+ int err;
+
+ pf_pdev = pci_physfn(padev->vf_pdev);
+ pf = pci_get_drvdata(pf_pdev);
+
+ dev_dbg(pf->dev, "%s: %s opcode %d\n",
+ __func__, dev_name(&padev->aux_dev.dev), req->opcode);
+
+ if (pf->state)
+ return -ENXIO;
+
+ /* Wrap the client's request */
+ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD;
+ cmd.client_request.client_id = cpu_to_le16(padev->client_id);
+ cp_len = min_t(size_t, req_len, sizeof(cmd.client_request.client_cmd));
+ memcpy(cmd.client_request.client_cmd, req, cp_len);
+
+ err = pdsc_adminq_post(pf, &cmd, resp,
+ !!(flags & PDS_AQ_FLAG_FASTPOLL));
+ if (err && err != -EAGAIN)
+ dev_info(pf->dev, "client admin cmd failed: %pe\n",
+ ERR_PTR(err));
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(pds_client_adminq_cmd);
+
+static void pdsc_auxbus_dev_release(struct device *dev)
+{
+ struct pds_auxiliary_dev *padev =
+ container_of(dev, struct pds_auxiliary_dev, aux_dev.dev);
+
+ kfree(padev);
+}
+
+static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf,
+ struct pdsc *pf,
+ u16 client_id,
+ char *name)
+{
+ struct auxiliary_device *aux_dev;
+ struct pds_auxiliary_dev *padev;
+ int err;
+
+ padev = kzalloc(sizeof(*padev), GFP_KERNEL);
+ if (!padev)
+ return ERR_PTR(-ENOMEM);
+
+ padev->vf_pdev = cf->pdev;
+ padev->client_id = client_id;
+
+ aux_dev = &padev->aux_dev;
+ aux_dev->name = name;
+ aux_dev->id = cf->uid;
+ aux_dev->dev.parent = cf->dev;
+ aux_dev->dev.release = pdsc_auxbus_dev_release;
+
+ err = auxiliary_device_init(aux_dev);
+ if (err < 0) {
+ dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n",
+ name, ERR_PTR(err));
+ goto err_out;
+ }
+
+ err = auxiliary_device_add(aux_dev);
+ if (err) {
+ dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n",
+ name, ERR_PTR(err));
+ goto err_out_uninit;
+ }
+
+ return padev;
+
+err_out_uninit:
+ auxiliary_device_uninit(aux_dev);
+err_out:
+ kfree(padev);
+ return ERR_PTR(err);
+}
+
+int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)
+{
+ struct pds_auxiliary_dev *padev;
+ int err = 0;
+
+ mutex_lock(&pf->config_lock);
+
+ padev = pf->vfs[cf->vf_id].padev;
+ if (padev) {
+ pds_client_unregister(pf->pdev, padev->client_id);
+ auxiliary_device_delete(&padev->aux_dev);
+ auxiliary_device_uninit(&padev->aux_dev);
+ padev->client_id = 0;
+ }
+ pf->vfs[cf->vf_id].padev = NULL;
+
+ mutex_unlock(&pf->config_lock);
+ return err;
+}
+
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
+{
+ struct pds_auxiliary_dev *padev;
+ enum pds_core_vif_types vt;
+ char devname[PDS_DEVNAME_LEN];
+ u16 vt_support;
+ int client_id;
+ int err = 0;
+
+ mutex_lock(&pf->config_lock);
+
+ /* We only support vDPA so far, so it is the only one to
+ * be verified that it is available in the Core device and
+ * enabled in the devlink param. In the future this might
+ * become a loop for several VIF types.
+ */
+
+ /* Verify that the type is supported and enabled. It is not
+ * an error if there is no auxbus device support for this
+ * VF, it just means something else needs to happen with it.
+ */
+ vt = PDS_DEV_TYPE_VDPA;
+ vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]);
+ if (!(vt_support &&
+ pf->viftype_status[vt].supported &&
+ pf->viftype_status[vt].enabled))
+ goto out_unlock;
+
+ /* Need to register with FW and get the client_id before
+ * creating the aux device so that the aux client can run
+ * adminq commands as part its probe
+ */
+ snprintf(devname, sizeof(devname), "%s.%s.%d",
+ PDS_CORE_DRV_NAME, pf->viftype_status[vt].name, cf->uid);
+ client_id = pds_client_register(pf->pdev, devname);
+ if (client_id < 0) {
+ err = client_id;
+ goto out_unlock;
+ }
+
+ padev = pdsc_auxbus_dev_register(cf, pf, client_id,
+ pf->viftype_status[vt].name);
+ if (IS_ERR(padev)) {
+ pds_client_unregister(pf->pdev, client_id);
+ err = PTR_ERR(padev);
+ goto out_unlock;
+ }
+ pf->vfs[cf->vf_id].padev = padev;
+
+out_unlock:
+ mutex_unlock(&pf->config_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
new file mode 100644
index 000000000000..483a070d96fa
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "core.h"
+
+static BLOCKING_NOTIFIER_HEAD(pds_notify_chain);
+
+int pdsc_register_notify(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&pds_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(pdsc_register_notify);
+
+void pdsc_unregister_notify(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&pds_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(pdsc_unregister_notify);
+
+void pdsc_notify(unsigned long event, void *data)
+{
+ blocking_notifier_call_chain(&pds_notify_chain, event, data);
+}
+
+void pdsc_intr_free(struct pdsc *pdsc, int index)
+{
+ struct pdsc_intr_info *intr_info;
+
+ if (index >= pdsc->nintrs || index < 0) {
+ WARN(true, "bad intr index %d\n", index);
+ return;
+ }
+
+ intr_info = &pdsc->intr_info[index];
+ if (!intr_info->vector)
+ return;
+ dev_dbg(pdsc->dev, "%s: idx %d vec %d name %s\n",
+ __func__, index, intr_info->vector, intr_info->name);
+
+ pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
+ pds_core_intr_clean(&pdsc->intr_ctrl[index]);
+
+ free_irq(intr_info->vector, intr_info->data);
+
+ memset(intr_info, 0, sizeof(*intr_info));
+}
+
+int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
+ irq_handler_t handler, void *data)
+{
+ struct pdsc_intr_info *intr_info;
+ unsigned int index;
+ int err;
+
+ /* Find the first available interrupt */
+ for (index = 0; index < pdsc->nintrs; index++)
+ if (!pdsc->intr_info[index].vector)
+ break;
+ if (index >= pdsc->nintrs) {
+ dev_warn(pdsc->dev, "%s: no intr, index=%d nintrs=%d\n",
+ __func__, index, pdsc->nintrs);
+ return -ENOSPC;
+ }
+
+ pds_core_intr_clean_flags(&pdsc->intr_ctrl[index],
+ PDS_CORE_INTR_CRED_RESET_COALESCE);
+
+ intr_info = &pdsc->intr_info[index];
+
+ intr_info->index = index;
+ intr_info->data = data;
+ strscpy(intr_info->name, name, sizeof(intr_info->name));
+
+ /* Get the OS vector number for the interrupt */
+ err = pci_irq_vector(pdsc->pdev, index);
+ if (err < 0) {
+ dev_err(pdsc->dev, "failed to get intr vector index %d: %pe\n",
+ index, ERR_PTR(err));
+ goto err_out_free_intr;
+ }
+ intr_info->vector = err;
+
+ /* Init the device's intr mask */
+ pds_core_intr_clean(&pdsc->intr_ctrl[index]);
+ pds_core_intr_mask_assert(&pdsc->intr_ctrl[index], 1);
+ pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
+
+ /* Register the isr with a name */
+ err = request_irq(intr_info->vector, handler, 0, intr_info->name, data);
+ if (err) {
+ dev_err(pdsc->dev, "failed to get intr irq vector %d: %pe\n",
+ intr_info->vector, ERR_PTR(err));
+ goto err_out_free_intr;
+ }
+
+ return index;
+
+err_out_free_intr:
+ pdsc_intr_free(pdsc, index);
+ return err;
+}
+
+static void pdsc_qcq_intr_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+ if (!(qcq->flags & PDS_CORE_QCQ_F_INTR) ||
+ qcq->intx == PDS_CORE_INTR_INDEX_NOT_ASSIGNED)
+ return;
+
+ pdsc_intr_free(pdsc, qcq->intx);
+ qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
+}
+
+static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+ char name[PDSC_INTR_NAME_MAX_SZ];
+ int index;
+
+ if (!(qcq->flags & PDS_CORE_QCQ_F_INTR)) {
+ qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
+ return 0;
+ }
+
+ snprintf(name, sizeof(name), "%s-%d-%s",
+ PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name);
+ index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq);
+ if (index < 0)
+ return index;
+ qcq->intx = index;
+
+ return 0;
+}
+
+void pdsc_qcq_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+ struct device *dev = pdsc->dev;
+
+ if (!(qcq && qcq->pdsc))
+ return;
+
+ pdsc_debugfs_del_qcq(qcq);
+
+ pdsc_qcq_intr_free(pdsc, qcq);
+
+ if (qcq->q_base)
+ dma_free_coherent(dev, qcq->q_size,
+ qcq->q_base, qcq->q_base_pa);
+
+ if (qcq->cq_base)
+ dma_free_coherent(dev, qcq->cq_size,
+ qcq->cq_base, qcq->cq_base_pa);
+
+ if (qcq->cq.info)
+ vfree(qcq->cq.info);
+
+ if (qcq->q.info)
+ vfree(qcq->q.info);
+
+ memset(qcq, 0, sizeof(*qcq));
+}
+
+static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa)
+{
+ struct pdsc_q_info *cur;
+ unsigned int i;
+
+ q->base = base;
+ q->base_pa = base_pa;
+
+ for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+ cur->desc = base + (i * q->desc_size);
+}
+
+static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa)
+{
+ struct pdsc_cq_info *cur;
+ unsigned int i;
+
+ cq->base = base;
+ cq->base_pa = base_pa;
+
+ for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
+ cur->comp = base + (i * cq->desc_size);
+}
+
+int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
+ const char *name, unsigned int flags, unsigned int num_descs,
+ unsigned int desc_size, unsigned int cq_desc_size,
+ unsigned int pid, struct pdsc_qcq *qcq)
+{
+ struct device *dev = pdsc->dev;
+ void *q_base, *cq_base;
+ dma_addr_t cq_base_pa;
+ dma_addr_t q_base_pa;
+ int err;
+
+ qcq->q.info = vzalloc(num_descs * sizeof(*qcq->q.info));
+ if (!qcq->q.info) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ qcq->pdsc = pdsc;
+ qcq->flags = flags;
+ INIT_WORK(&qcq->work, pdsc_work_thread);
+
+ qcq->q.type = type;
+ qcq->q.index = index;
+ qcq->q.num_descs = num_descs;
+ qcq->q.desc_size = desc_size;
+ qcq->q.tail_idx = 0;
+ qcq->q.head_idx = 0;
+ qcq->q.pid = pid;
+ snprintf(qcq->q.name, sizeof(qcq->q.name), "%s%u", name, index);
+
+ err = pdsc_qcq_intr_alloc(pdsc, qcq);
+ if (err)
+ goto err_out_free_q_info;
+
+ qcq->cq.info = vzalloc(num_descs * sizeof(*qcq->cq.info));
+ if (!qcq->cq.info) {
+ err = -ENOMEM;
+ goto err_out_free_irq;
+ }
+
+ qcq->cq.bound_intr = &pdsc->intr_info[qcq->intx];
+ qcq->cq.num_descs = num_descs;
+ qcq->cq.desc_size = cq_desc_size;
+ qcq->cq.tail_idx = 0;
+ qcq->cq.done_color = 1;
+
+ if (flags & PDS_CORE_QCQ_F_NOTIFYQ) {
+ /* q & cq need to be contiguous in case of notifyq */
+ qcq->q_size = PDS_PAGE_SIZE +
+ ALIGN(num_descs * desc_size, PDS_PAGE_SIZE) +
+ ALIGN(num_descs * cq_desc_size, PDS_PAGE_SIZE);
+ qcq->q_base = dma_alloc_coherent(dev,
+ qcq->q_size + qcq->cq_size,
+ &qcq->q_base_pa,
+ GFP_KERNEL);
+ if (!qcq->q_base) {
+ err = -ENOMEM;
+ goto err_out_free_cq_info;
+ }
+ q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
+ q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
+ pdsc_q_map(&qcq->q, q_base, q_base_pa);
+
+ cq_base = PTR_ALIGN(q_base +
+ ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
+ PDS_PAGE_SIZE);
+ cq_base_pa = ALIGN(qcq->q_base_pa +
+ ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
+ PDS_PAGE_SIZE);
+
+ } else {
+ /* q DMA descriptors */
+ qcq->q_size = PDS_PAGE_SIZE + (num_descs * desc_size);
+ qcq->q_base = dma_alloc_coherent(dev, qcq->q_size,
+ &qcq->q_base_pa,
+ GFP_KERNEL);
+ if (!qcq->q_base) {
+ err = -ENOMEM;
+ goto err_out_free_cq_info;
+ }
+ q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
+ q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
+ pdsc_q_map(&qcq->q, q_base, q_base_pa);
+
+ /* cq DMA descriptors */
+ qcq->cq_size = PDS_PAGE_SIZE + (num_descs * cq_desc_size);
+ qcq->cq_base = dma_alloc_coherent(dev, qcq->cq_size,
+ &qcq->cq_base_pa,
+ GFP_KERNEL);
+ if (!qcq->cq_base) {
+ err = -ENOMEM;
+ goto err_out_free_q;
+ }
+ cq_base = PTR_ALIGN(qcq->cq_base, PDS_PAGE_SIZE);
+ cq_base_pa = ALIGN(qcq->cq_base_pa, PDS_PAGE_SIZE);
+ }
+
+ pdsc_cq_map(&qcq->cq, cq_base, cq_base_pa);
+ qcq->cq.bound_q = &qcq->q;
+
+ pdsc_debugfs_add_qcq(pdsc, qcq);
+
+ return 0;
+
+err_out_free_q:
+ dma_free_coherent(dev, qcq->q_size, qcq->q_base, qcq->q_base_pa);
+err_out_free_cq_info:
+ vfree(qcq->cq.info);
+err_out_free_irq:
+ pdsc_qcq_intr_free(pdsc, qcq);
+err_out_free_q_info:
+ vfree(qcq->q.info);
+ memset(qcq, 0, sizeof(*qcq));
+err_out:
+ dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
+ return err;
+}
+
+static int pdsc_core_init(struct pdsc *pdsc)
+{
+ union pds_core_dev_comp comp = {};
+ union pds_core_dev_cmd cmd = {
+ .init.opcode = PDS_CORE_CMD_INIT,
+ };
+ struct pds_core_dev_init_data_out cido;
+ struct pds_core_dev_init_data_in cidi;
+ u32 dbid_count;
+ u32 dbpage_num;
+ size_t sz;
+ int err;
+
+ cidi.adminq_q_base = cpu_to_le64(pdsc->adminqcq.q_base_pa);
+ cidi.adminq_cq_base = cpu_to_le64(pdsc->adminqcq.cq_base_pa);
+ cidi.notifyq_cq_base = cpu_to_le64(pdsc->notifyqcq.cq.base_pa);
+ cidi.flags = cpu_to_le32(PDS_CORE_QINIT_F_IRQ | PDS_CORE_QINIT_F_ENA);
+ cidi.intr_index = cpu_to_le16(pdsc->adminqcq.intx);
+ cidi.adminq_ring_size = ilog2(pdsc->adminqcq.q.num_descs);
+ cidi.notifyq_ring_size = ilog2(pdsc->notifyqcq.q.num_descs);
+
+ mutex_lock(&pdsc->devcmd_lock);
+
+ sz = min_t(size_t, sizeof(cidi), sizeof(pdsc->cmd_regs->data));
+ memcpy_toio(&pdsc->cmd_regs->data, &cidi, sz);
+
+ err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+ if (!err) {
+ sz = min_t(size_t, sizeof(cido), sizeof(pdsc->cmd_regs->data));
+ memcpy_fromio(&cido, &pdsc->cmd_regs->data, sz);
+ }
+
+ mutex_unlock(&pdsc->devcmd_lock);
+ if (err) {
+ dev_err(pdsc->dev, "Device init command failed: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ pdsc->hw_index = le32_to_cpu(cido.core_hw_index);
+
+ dbid_count = le32_to_cpu(pdsc->dev_ident.ndbpgs_per_lif);
+ dbpage_num = pdsc->hw_index * dbid_count;
+ pdsc->kern_dbpage = pdsc_map_dbpage(pdsc, dbpage_num);
+ if (!pdsc->kern_dbpage) {
+ dev_err(pdsc->dev, "Cannot map dbpage, aborting\n");
+ return -ENOMEM;
+ }
+
+ pdsc->adminqcq.q.hw_type = cido.adminq_hw_type;
+ pdsc->adminqcq.q.hw_index = le32_to_cpu(cido.adminq_hw_index);
+ pdsc->adminqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->adminqcq.q.hw_index);
+
+ pdsc->notifyqcq.q.hw_type = cido.notifyq_hw_type;
+ pdsc->notifyqcq.q.hw_index = le32_to_cpu(cido.notifyq_hw_index);
+ pdsc->notifyqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->notifyqcq.q.hw_index);
+
+ pdsc->last_eid = 0;
+
+ return err;
+}
+
+static struct pdsc_viftype pdsc_viftype_defaults[] = {
+ [PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR,
+ .vif_id = PDS_DEV_TYPE_VDPA,
+ .dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET },
+ [PDS_DEV_TYPE_MAX] = {}
+};
+
+static int pdsc_viftypes_init(struct pdsc *pdsc)
+{
+ enum pds_core_vif_types vt;
+
+ pdsc->viftype_status = kzalloc(sizeof(pdsc_viftype_defaults),
+ GFP_KERNEL);
+ if (!pdsc->viftype_status)
+ return -ENOMEM;
+
+ for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
+ bool vt_support;
+
+ if (!pdsc_viftype_defaults[vt].name)
+ continue;
+
+ /* Grab the defaults */
+ pdsc->viftype_status[vt] = pdsc_viftype_defaults[vt];
+
+ /* See what the Core device has for support */
+ vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]);
+ dev_dbg(pdsc->dev, "VIF %s is %ssupported\n",
+ pdsc->viftype_status[vt].name,
+ vt_support ? "" : "not ");
+
+ pdsc->viftype_status[vt].supported = vt_support;
+ }
+
+ return 0;
+}
+
+int pdsc_setup(struct pdsc *pdsc, bool init)
+{
+ int numdescs;
+ int err;
+
+ if (init)
+ err = pdsc_dev_init(pdsc);
+ else
+ err = pdsc_dev_reinit(pdsc);
+ if (err)
+ return err;
+
+ /* Scale the descriptor ring length based on number of CPUs and VFs */
+ numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
+ numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
+ numdescs = roundup_pow_of_two(numdescs);
+ err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
+ PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
+ numdescs,
+ sizeof(union pds_core_adminq_cmd),
+ sizeof(union pds_core_adminq_comp),
+ 0, &pdsc->adminqcq);
+ if (err)
+ goto err_out_teardown;
+
+ err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_NOTIFYQ, 0, "notifyq",
+ PDS_CORE_QCQ_F_NOTIFYQ,
+ PDSC_NOTIFYQ_LENGTH,
+ sizeof(struct pds_core_notifyq_cmd),
+ sizeof(union pds_core_notifyq_comp),
+ 0, &pdsc->notifyqcq);
+ if (err)
+ goto err_out_teardown;
+
+ /* NotifyQ rides on the AdminQ interrupt */
+ pdsc->notifyqcq.intx = pdsc->adminqcq.intx;
+
+ /* Set up the Core with the AdminQ and NotifyQ info */
+ err = pdsc_core_init(pdsc);
+ if (err)
+ goto err_out_teardown;
+
+ /* Set up the VIFs */
+ err = pdsc_viftypes_init(pdsc);
+ if (err)
+ goto err_out_teardown;
+
+ if (init)
+ pdsc_debugfs_add_viftype(pdsc);
+
+ clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
+ return 0;
+
+err_out_teardown:
+ pdsc_teardown(pdsc, init);
+ return err;
+}
+
+void pdsc_teardown(struct pdsc *pdsc, bool removing)
+{
+ int i;
+
+ pdsc_devcmd_reset(pdsc);
+ pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
+ pdsc_qcq_free(pdsc, &pdsc->adminqcq);
+
+ kfree(pdsc->viftype_status);
+ pdsc->viftype_status = NULL;
+
+ if (pdsc->intr_info) {
+ for (i = 0; i < pdsc->nintrs; i++)
+ pdsc_intr_free(pdsc, i);
+
+ if (removing) {
+ kfree(pdsc->intr_info);
+ pdsc->intr_info = NULL;
+ }
+ }
+
+ if (pdsc->kern_dbpage) {
+ iounmap(pdsc->kern_dbpage);
+ pdsc->kern_dbpage = NULL;
+ }
+
+ set_bit(PDSC_S_FW_DEAD, &pdsc->state);
+}
+
+int pdsc_start(struct pdsc *pdsc)
+{
+ pds_core_intr_mask(&pdsc->intr_ctrl[pdsc->adminqcq.intx],
+ PDS_CORE_INTR_MASK_CLEAR);
+
+ return 0;
+}
+
+void pdsc_stop(struct pdsc *pdsc)
+{
+ int i;
+
+ if (!pdsc->intr_info)
+ return;
+
+ /* Mask interrupts that are in use */
+ for (i = 0; i < pdsc->nintrs; i++)
+ if (pdsc->intr_info[i].vector)
+ pds_core_intr_mask(&pdsc->intr_ctrl[i],
+ PDS_CORE_INTR_MASK_SET);
+}
+
+static void pdsc_fw_down(struct pdsc *pdsc)
+{
+ union pds_core_notifyq_comp reset_event = {
+ .reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
+ .reset.state = 0,
+ };
+
+ if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: already happening\n", __func__);
+ return;
+ }
+
+ /* Notify clients of fw_down */
+ devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
+ pdsc_notify(PDS_EVENT_RESET, &reset_event);
+
+ pdsc_stop(pdsc);
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+}
+
+static void pdsc_fw_up(struct pdsc *pdsc)
+{
+ union pds_core_notifyq_comp reset_event = {
+ .reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
+ .reset.state = 1,
+ };
+ int err;
+
+ if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
+ return;
+ }
+
+ err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
+ if (err)
+ goto err_out;
+
+ err = pdsc_start(pdsc);
+ if (err)
+ goto err_out;
+
+ /* Notify clients of fw_up */
+ pdsc->fw_recoveries++;
+ devlink_health_reporter_state_update(pdsc->fw_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+ pdsc_notify(PDS_EVENT_RESET, &reset_event);
+
+ return;
+
+err_out:
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+}
+
+void pdsc_health_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
+ unsigned long mask;
+ bool healthy;
+
+ mutex_lock(&pdsc->config_lock);
+
+ /* Don't do a check when in a transition state */
+ mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
+ BIT_ULL(PDSC_S_STOPPING_DRIVER);
+ if (pdsc->state & mask)
+ goto out_unlock;
+
+ healthy = pdsc_is_fw_good(pdsc);
+ dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
+ __func__, healthy, pdsc->fw_status, pdsc->last_hb);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ if (healthy)
+ pdsc_fw_up(pdsc);
+ } else {
+ if (!healthy)
+ pdsc_fw_down(pdsc);
+ }
+
+ pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+
+out_unlock:
+ mutex_unlock(&pdsc->config_lock);
+}
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
new file mode 100644
index 000000000000..e545fafc4819
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDSC_H_
+#define _PDSC_H_
+
+#include <linux/debugfs.h>
+#include <net/devlink.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_intr.h>
+
+#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
+
+#define PDSC_WATCHDOG_SECS 5
+#define PDSC_QUEUE_NAME_MAX_SZ 32
+#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */
+#define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */
+#define PDSC_TEARDOWN_RECOVERY false
+#define PDSC_TEARDOWN_REMOVING true
+#define PDSC_SETUP_RECOVERY false
+#define PDSC_SETUP_INIT true
+
+struct pdsc_dev_bar {
+ void __iomem *vaddr;
+ phys_addr_t bus_addr;
+ unsigned long len;
+ int res_index;
+};
+
+struct pdsc;
+
+struct pdsc_vf {
+ struct pds_auxiliary_dev *padev;
+ struct pdsc *vf;
+ u16 index;
+ __le16 vif_types[PDS_DEV_TYPE_MAX];
+};
+
+struct pdsc_devinfo {
+ u8 asic_type;
+ u8 asic_rev;
+ char fw_version[PDS_CORE_DEVINFO_FWVERS_BUFLEN + 1];
+ char serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN + 1];
+};
+
+struct pdsc_queue {
+ struct pdsc_q_info *info;
+ u64 dbval;
+ u16 head_idx;
+ u16 tail_idx;
+ u8 hw_type;
+ unsigned int index;
+ unsigned int num_descs;
+ u64 dbell_count;
+ u64 features;
+ unsigned int type;
+ unsigned int hw_index;
+ union {
+ void *base;
+ struct pds_core_admin_cmd *adminq;
+ };
+ dma_addr_t base_pa; /* must be page aligned */
+ unsigned int desc_size;
+ unsigned int pid;
+ char name[PDSC_QUEUE_NAME_MAX_SZ];
+};
+
+#define PDSC_INTR_NAME_MAX_SZ 32
+
+struct pdsc_intr_info {
+ char name[PDSC_INTR_NAME_MAX_SZ];
+ unsigned int index;
+ unsigned int vector;
+ void *data;
+};
+
+struct pdsc_cq_info {
+ void *comp;
+};
+
+struct pdsc_buf_info {
+ struct page *page;
+ dma_addr_t dma_addr;
+ u32 page_offset;
+ u32 len;
+};
+
+struct pdsc_q_info {
+ union {
+ void *desc;
+ struct pdsc_admin_cmd *adminq_desc;
+ };
+ unsigned int bytes;
+ unsigned int nbufs;
+ struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS];
+ struct pdsc_wait_context *wc;
+ void *dest;
+};
+
+struct pdsc_cq {
+ struct pdsc_cq_info *info;
+ struct pdsc_queue *bound_q;
+ struct pdsc_intr_info *bound_intr;
+ u16 tail_idx;
+ bool done_color;
+ unsigned int num_descs;
+ unsigned int desc_size;
+ void *base;
+ dma_addr_t base_pa; /* must be page aligned */
+} ____cacheline_aligned_in_smp;
+
+struct pdsc_qcq {
+ struct pdsc *pdsc;
+ void *q_base;
+ dma_addr_t q_base_pa; /* might not be page aligned */
+ void *cq_base;
+ dma_addr_t cq_base_pa; /* might not be page aligned */
+ u32 q_size;
+ u32 cq_size;
+ bool armed;
+ unsigned int flags;
+
+ struct work_struct work;
+ struct pdsc_queue q;
+ struct pdsc_cq cq;
+ int intx;
+
+ u32 accum_work;
+ struct dentry *dentry;
+};
+
+struct pdsc_viftype {
+ char *name;
+ bool supported;
+ bool enabled;
+ int dl_id;
+ int vif_id;
+ struct pds_auxiliary_dev *padev;
+};
+
+/* No state flags set means we are in a steady running state */
+enum pdsc_state_flags {
+ PDSC_S_FW_DEAD, /* stopped, wait on startup or recovery */
+ PDSC_S_INITING_DRIVER, /* initial startup from probe */
+ PDSC_S_STOPPING_DRIVER, /* driver remove */
+
+ /* leave this as last */
+ PDSC_S_STATE_SIZE
+};
+
+struct pdsc {
+ struct pci_dev *pdev;
+ struct dentry *dentry;
+ struct device *dev;
+ struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX];
+ struct pdsc_vf *vfs;
+ int num_vfs;
+ int vf_id;
+ int hw_index;
+ int uid;
+
+ unsigned long state;
+ u8 fw_status;
+ u8 fw_generation;
+ unsigned long last_fw_time;
+ u32 last_hb;
+ struct timer_list wdtimer;
+ unsigned int wdtimer_period;
+ struct work_struct health_work;
+ struct devlink_health_reporter *fw_reporter;
+ u32 fw_recoveries;
+
+ struct pdsc_devinfo dev_info;
+ struct pds_core_dev_identity dev_ident;
+ unsigned int nintrs;
+ struct pdsc_intr_info *intr_info; /* array of nintrs elements */
+
+ struct workqueue_struct *wq;
+
+ unsigned int devcmd_timeout;
+ struct mutex devcmd_lock; /* lock for dev_cmd operations */
+ struct mutex config_lock; /* lock for configuration operations */
+ spinlock_t adminq_lock; /* lock for adminq operations */
+ struct pds_core_dev_info_regs __iomem *info_regs;
+ struct pds_core_dev_cmd_regs __iomem *cmd_regs;
+ struct pds_core_intr __iomem *intr_ctrl;
+ u64 __iomem *intr_status;
+ u64 __iomem *db_pages;
+ dma_addr_t phy_db_pages;
+ u64 __iomem *kern_dbpage;
+
+ struct pdsc_qcq adminqcq;
+ struct pdsc_qcq notifyqcq;
+ u64 last_eid;
+ struct pdsc_viftype *viftype_status;
+};
+
+/** enum pds_core_dbell_bits - bitwise composition of dbell values.
+ *
+ * @PDS_CORE_DBELL_QID_MASK: unshifted mask of valid queue id bits.
+ * @PDS_CORE_DBELL_QID_SHIFT: queue id shift amount in dbell value.
+ * @PDS_CORE_DBELL_QID: macro to build QID component of dbell value.
+ *
+ * @PDS_CORE_DBELL_RING_MASK: unshifted mask of valid ring bits.
+ * @PDS_CORE_DBELL_RING_SHIFT: ring shift amount in dbell value.
+ * @PDS_CORE_DBELL_RING: macro to build ring component of dbell value.
+ *
+ * @PDS_CORE_DBELL_RING_0: ring zero dbell component value.
+ * @PDS_CORE_DBELL_RING_1: ring one dbell component value.
+ * @PDS_CORE_DBELL_RING_2: ring two dbell component value.
+ * @PDS_CORE_DBELL_RING_3: ring three dbell component value.
+ *
+ * @PDS_CORE_DBELL_INDEX_MASK: bit mask of valid index bits, no shift needed.
+ */
+enum pds_core_dbell_bits {
+ PDS_CORE_DBELL_QID_MASK = 0xffffff,
+ PDS_CORE_DBELL_QID_SHIFT = 24,
+
+#define PDS_CORE_DBELL_QID(n) \
+ (((u64)(n) & PDS_CORE_DBELL_QID_MASK) << PDS_CORE_DBELL_QID_SHIFT)
+
+ PDS_CORE_DBELL_RING_MASK = 0x7,
+ PDS_CORE_DBELL_RING_SHIFT = 16,
+
+#define PDS_CORE_DBELL_RING(n) \
+ (((u64)(n) & PDS_CORE_DBELL_RING_MASK) << PDS_CORE_DBELL_RING_SHIFT)
+
+ PDS_CORE_DBELL_RING_0 = 0,
+ PDS_CORE_DBELL_RING_1 = PDS_CORE_DBELL_RING(1),
+ PDS_CORE_DBELL_RING_2 = PDS_CORE_DBELL_RING(2),
+ PDS_CORE_DBELL_RING_3 = PDS_CORE_DBELL_RING(3),
+
+ PDS_CORE_DBELL_INDEX_MASK = 0xffff,
+};
+
+static inline void pds_core_dbell_ring(u64 __iomem *db_page,
+ enum pds_core_logical_qtype qtype,
+ u64 val)
+{
+ writeq(val, &db_page[qtype]);
+}
+
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack);
+int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack);
+int pdsc_dl_flash_update(struct devlink *dl,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack);
+int pdsc_dl_enable_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx);
+int pdsc_dl_enable_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx);
+int pdsc_dl_enable_validate(struct devlink *dl, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack);
+
+void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num);
+
+void pdsc_debugfs_create(void);
+void pdsc_debugfs_destroy(void);
+void pdsc_debugfs_add_dev(struct pdsc *pdsc);
+void pdsc_debugfs_del_dev(struct pdsc *pdsc);
+void pdsc_debugfs_add_ident(struct pdsc *pdsc);
+void pdsc_debugfs_add_viftype(struct pdsc *pdsc);
+void pdsc_debugfs_add_irqs(struct pdsc *pdsc);
+void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq);
+void pdsc_debugfs_del_qcq(struct pdsc_qcq *qcq);
+
+int pdsc_err_to_errno(enum pds_core_status_code code);
+bool pdsc_is_fw_running(struct pdsc *pdsc);
+bool pdsc_is_fw_good(struct pdsc *pdsc);
+int pdsc_devcmd(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
+ union pds_core_dev_comp *comp, int max_seconds);
+int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
+ union pds_core_dev_comp *comp, int max_seconds);
+int pdsc_devcmd_init(struct pdsc *pdsc);
+int pdsc_devcmd_reset(struct pdsc *pdsc);
+int pdsc_dev_reinit(struct pdsc *pdsc);
+int pdsc_dev_init(struct pdsc *pdsc);
+
+int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
+ irq_handler_t handler, void *data);
+void pdsc_intr_free(struct pdsc *pdsc, int index);
+void pdsc_qcq_free(struct pdsc *pdsc, struct pdsc_qcq *qcq);
+int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
+ const char *name, unsigned int flags, unsigned int num_descs,
+ unsigned int desc_size, unsigned int cq_desc_size,
+ unsigned int pid, struct pdsc_qcq *qcq);
+int pdsc_setup(struct pdsc *pdsc, bool init);
+void pdsc_teardown(struct pdsc *pdsc, bool removing);
+int pdsc_start(struct pdsc *pdsc);
+void pdsc_stop(struct pdsc *pdsc);
+void pdsc_health_thread(struct work_struct *work);
+
+int pdsc_register_notify(struct notifier_block *nb);
+void pdsc_unregister_notify(struct notifier_block *nb);
+void pdsc_notify(unsigned long event, void *data);
+int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf);
+int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf);
+
+void pdsc_process_adminq(struct pdsc_qcq *qcq);
+void pdsc_work_thread(struct work_struct *work);
+irqreturn_t pdsc_adminq_isr(int irq, void *data);
+
+int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
+ struct netlink_ext_ack *extack);
+#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
new file mode 100644
index 000000000000..8ec392299b7d
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+
+#include "core.h"
+
+static struct dentry *pdsc_dir;
+
+void pdsc_debugfs_create(void)
+{
+ pdsc_dir = debugfs_create_dir(PDS_CORE_DRV_NAME, NULL);
+}
+
+void pdsc_debugfs_destroy(void)
+{
+ debugfs_remove_recursive(pdsc_dir);
+}
+
+void pdsc_debugfs_add_dev(struct pdsc *pdsc)
+{
+ pdsc->dentry = debugfs_create_dir(pci_name(pdsc->pdev), pdsc_dir);
+
+ debugfs_create_ulong("state", 0400, pdsc->dentry, &pdsc->state);
+}
+
+void pdsc_debugfs_del_dev(struct pdsc *pdsc)
+{
+ debugfs_remove_recursive(pdsc->dentry);
+ pdsc->dentry = NULL;
+}
+
+static int identity_show(struct seq_file *seq, void *v)
+{
+ struct pdsc *pdsc = seq->private;
+ struct pds_core_dev_identity *ident;
+ int vt;
+
+ ident = &pdsc->dev_ident;
+
+ seq_printf(seq, "fw_heartbeat: 0x%x\n",
+ ioread32(&pdsc->info_regs->fw_heartbeat));
+
+ seq_printf(seq, "nlifs: %d\n",
+ le32_to_cpu(ident->nlifs));
+ seq_printf(seq, "nintrs: %d\n",
+ le32_to_cpu(ident->nintrs));
+ seq_printf(seq, "ndbpgs_per_lif: %d\n",
+ le32_to_cpu(ident->ndbpgs_per_lif));
+ seq_printf(seq, "intr_coal_mult: %d\n",
+ le32_to_cpu(ident->intr_coal_mult));
+ seq_printf(seq, "intr_coal_div: %d\n",
+ le32_to_cpu(ident->intr_coal_div));
+
+ seq_puts(seq, "vif_types: ");
+ for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++)
+ seq_printf(seq, "%d ",
+ le16_to_cpu(pdsc->dev_ident.vif_types[vt]));
+ seq_puts(seq, "\n");
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(identity);
+
+void pdsc_debugfs_add_ident(struct pdsc *pdsc)
+{
+ debugfs_create_file("identity", 0400, pdsc->dentry,
+ pdsc, &identity_fops);
+}
+
+static int viftype_show(struct seq_file *seq, void *v)
+{
+ struct pdsc *pdsc = seq->private;
+ int vt;
+
+ for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
+ if (!pdsc->viftype_status[vt].name)
+ continue;
+
+ seq_printf(seq, "%s\t%d supported %d enabled\n",
+ pdsc->viftype_status[vt].name,
+ pdsc->viftype_status[vt].supported,
+ pdsc->viftype_status[vt].enabled);
+ }
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(viftype);
+
+void pdsc_debugfs_add_viftype(struct pdsc *pdsc)
+{
+ debugfs_create_file("viftypes", 0400, pdsc->dentry,
+ pdsc, &viftype_fops);
+}
+
+static const struct debugfs_reg32 intr_ctrl_regs[] = {
+ { .name = "coal_init", .offset = 0, },
+ { .name = "mask", .offset = 4, },
+ { .name = "credits", .offset = 8, },
+ { .name = "mask_on_assert", .offset = 12, },
+ { .name = "coal_timer", .offset = 16, },
+};
+
+void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+ struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
+ struct dentry *intr_dentry;
+ struct debugfs_regset32 *intr_ctrl_regset;
+ struct pdsc_intr_info *intr = &pdsc->intr_info[qcq->intx];
+ struct pdsc_queue *q = &qcq->q;
+ struct pdsc_cq *cq = &qcq->cq;
+
+ qcq_dentry = debugfs_create_dir(q->name, pdsc->dentry);
+ if (IS_ERR_OR_NULL(qcq_dentry))
+ return;
+ qcq->dentry = qcq_dentry;
+
+ debugfs_create_x64("q_base_pa", 0400, qcq_dentry, &qcq->q_base_pa);
+ debugfs_create_x32("q_size", 0400, qcq_dentry, &qcq->q_size);
+ debugfs_create_x64("cq_base_pa", 0400, qcq_dentry, &qcq->cq_base_pa);
+ debugfs_create_x32("cq_size", 0400, qcq_dentry, &qcq->cq_size);
+ debugfs_create_x32("accum_work", 0400, qcq_dentry, &qcq->accum_work);
+
+ q_dentry = debugfs_create_dir("q", qcq->dentry);
+ if (IS_ERR_OR_NULL(q_dentry))
+ return;
+
+ debugfs_create_u32("index", 0400, q_dentry, &q->index);
+ debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
+ debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
+ debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
+
+ debugfs_create_u16("tail", 0400, q_dentry, &q->tail_idx);
+ debugfs_create_u16("head", 0400, q_dentry, &q->head_idx);
+
+ cq_dentry = debugfs_create_dir("cq", qcq->dentry);
+ if (IS_ERR_OR_NULL(cq_dentry))
+ return;
+
+ debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
+ debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs);
+ debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size);
+ debugfs_create_bool("done_color", 0400, cq_dentry, &cq->done_color);
+ debugfs_create_u16("tail", 0400, cq_dentry, &cq->tail_idx);
+
+ if (qcq->flags & PDS_CORE_QCQ_F_INTR) {
+ intr_dentry = debugfs_create_dir("intr", qcq->dentry);
+ if (IS_ERR_OR_NULL(intr_dentry))
+ return;
+
+ debugfs_create_u32("index", 0400, intr_dentry, &intr->index);
+ debugfs_create_u32("vector", 0400, intr_dentry, &intr->vector);
+
+ intr_ctrl_regset = kzalloc(sizeof(*intr_ctrl_regset),
+ GFP_KERNEL);
+ if (!intr_ctrl_regset)
+ return;
+ intr_ctrl_regset->regs = intr_ctrl_regs;
+ intr_ctrl_regset->nregs = ARRAY_SIZE(intr_ctrl_regs);
+ intr_ctrl_regset->base = &pdsc->intr_ctrl[intr->index];
+
+ debugfs_create_regset32("intr_ctrl", 0400, intr_dentry,
+ intr_ctrl_regset);
+ }
+};
+
+void pdsc_debugfs_del_qcq(struct pdsc_qcq *qcq)
+{
+ debugfs_remove_recursive(qcq->dentry);
+ qcq->dentry = NULL;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
new file mode 100644
index 000000000000..f7c597ea5daf
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+
+#include "core.h"
+
+int pdsc_err_to_errno(enum pds_core_status_code code)
+{
+ switch (code) {
+ case PDS_RC_SUCCESS:
+ return 0;
+ case PDS_RC_EVERSION:
+ case PDS_RC_EQTYPE:
+ case PDS_RC_EQID:
+ case PDS_RC_EINVAL:
+ case PDS_RC_ENOSUPP:
+ return -EINVAL;
+ case PDS_RC_EPERM:
+ return -EPERM;
+ case PDS_RC_ENOENT:
+ return -ENOENT;
+ case PDS_RC_EAGAIN:
+ return -EAGAIN;
+ case PDS_RC_ENOMEM:
+ return -ENOMEM;
+ case PDS_RC_EFAULT:
+ return -EFAULT;
+ case PDS_RC_EBUSY:
+ return -EBUSY;
+ case PDS_RC_EEXIST:
+ return -EEXIST;
+ case PDS_RC_EVFID:
+ return -ENODEV;
+ case PDS_RC_ECLIENT:
+ return -ECHILD;
+ case PDS_RC_ENOSPC:
+ return -ENOSPC;
+ case PDS_RC_ERANGE:
+ return -ERANGE;
+ case PDS_RC_BAD_ADDR:
+ return -EFAULT;
+ case PDS_RC_EOPCODE:
+ case PDS_RC_EINTR:
+ case PDS_RC_DEV_CMD:
+ case PDS_RC_ERROR:
+ case PDS_RC_ERDMA:
+ case PDS_RC_EIO:
+ default:
+ return -EIO;
+ }
+}
+
+bool pdsc_is_fw_running(struct pdsc *pdsc)
+{
+ pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status);
+ pdsc->last_fw_time = jiffies;
+ pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat);
+
+ /* Firmware is useful only if the running bit is set and
+ * fw_status != 0xff (bad PCI read)
+ */
+ return (pdsc->fw_status != 0xff) &&
+ (pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING);
+}
+
+bool pdsc_is_fw_good(struct pdsc *pdsc)
+{
+ u8 gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+
+ return pdsc_is_fw_running(pdsc) && gen == pdsc->fw_generation;
+}
+
+static u8 pdsc_devcmd_status(struct pdsc *pdsc)
+{
+ return ioread8(&pdsc->cmd_regs->comp.status);
+}
+
+static bool pdsc_devcmd_done(struct pdsc *pdsc)
+{
+ return ioread32(&pdsc->cmd_regs->done) & PDS_CORE_DEV_CMD_DONE;
+}
+
+static void pdsc_devcmd_dbell(struct pdsc *pdsc)
+{
+ iowrite32(0, &pdsc->cmd_regs->done);
+ iowrite32(1, &pdsc->cmd_regs->doorbell);
+}
+
+static void pdsc_devcmd_clean(struct pdsc *pdsc)
+{
+ iowrite32(0, &pdsc->cmd_regs->doorbell);
+ memset_io(&pdsc->cmd_regs->cmd, 0, sizeof(pdsc->cmd_regs->cmd));
+}
+
+static const char *pdsc_devcmd_str(int opcode)
+{
+ switch (opcode) {
+ case PDS_CORE_CMD_NOP:
+ return "PDS_CORE_CMD_NOP";
+ case PDS_CORE_CMD_IDENTIFY:
+ return "PDS_CORE_CMD_IDENTIFY";
+ case PDS_CORE_CMD_RESET:
+ return "PDS_CORE_CMD_RESET";
+ case PDS_CORE_CMD_INIT:
+ return "PDS_CORE_CMD_INIT";
+ case PDS_CORE_CMD_FW_DOWNLOAD:
+ return "PDS_CORE_CMD_FW_DOWNLOAD";
+ case PDS_CORE_CMD_FW_CONTROL:
+ return "PDS_CORE_CMD_FW_CONTROL";
+ default:
+ return "PDS_CORE_CMD_UNKNOWN";
+ }
+}
+
+static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds)
+{
+ struct device *dev = pdsc->dev;
+ unsigned long start_time;
+ unsigned long max_wait;
+ unsigned long duration;
+ int timeout = 0;
+ int done = 0;
+ int err = 0;
+ int status;
+ int opcode;
+
+ opcode = ioread8(&pdsc->cmd_regs->cmd.opcode);
+
+ start_time = jiffies;
+ max_wait = start_time + (max_seconds * HZ);
+
+ while (!done && !timeout) {
+ done = pdsc_devcmd_done(pdsc);
+ if (done)
+ break;
+
+ timeout = time_after(jiffies, max_wait);
+ if (timeout)
+ break;
+
+ usleep_range(100, 200);
+ }
+ duration = jiffies - start_time;
+
+ if (done && duration > HZ)
+ dev_dbg(dev, "DEVCMD %d %s after %ld secs\n",
+ opcode, pdsc_devcmd_str(opcode), duration / HZ);
+
+ if (!done || timeout) {
+ dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n",
+ opcode, pdsc_devcmd_str(opcode), done, timeout,
+ max_seconds);
+ err = -ETIMEDOUT;
+ pdsc_devcmd_clean(pdsc);
+ }
+
+ status = pdsc_devcmd_status(pdsc);
+ err = pdsc_err_to_errno(status);
+ if (err && err != -EAGAIN)
+ dev_err(dev, "DEVCMD %d %s failed, status=%d err %d %pe\n",
+ opcode, pdsc_devcmd_str(opcode), status, err,
+ ERR_PTR(err));
+
+ return err;
+}
+
+int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
+ union pds_core_dev_comp *comp, int max_seconds)
+{
+ int err;
+
+ memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd));
+ pdsc_devcmd_dbell(pdsc);
+ err = pdsc_devcmd_wait(pdsc, max_seconds);
+ memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
+
+ if (err == -ENXIO || err == -ETIMEDOUT)
+ queue_work(pdsc->wq, &pdsc->health_work);
+
+ return err;
+}
+
+int pdsc_devcmd(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
+ union pds_core_dev_comp *comp, int max_seconds)
+{
+ int err;
+
+ mutex_lock(&pdsc->devcmd_lock);
+ err = pdsc_devcmd_locked(pdsc, cmd, comp, max_seconds);
+ mutex_unlock(&pdsc->devcmd_lock);
+
+ return err;
+}
+
+int pdsc_devcmd_init(struct pdsc *pdsc)
+{
+ union pds_core_dev_comp comp = {};
+ union pds_core_dev_cmd cmd = {
+ .opcode = PDS_CORE_CMD_INIT,
+ };
+
+ return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+int pdsc_devcmd_reset(struct pdsc *pdsc)
+{
+ union pds_core_dev_comp comp = {};
+ union pds_core_dev_cmd cmd = {
+ .reset.opcode = PDS_CORE_CMD_RESET,
+ };
+
+ return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static int pdsc_devcmd_identify_locked(struct pdsc *pdsc)
+{
+ union pds_core_dev_comp comp = {};
+ union pds_core_dev_cmd cmd = {
+ .identify.opcode = PDS_CORE_CMD_IDENTIFY,
+ .identify.ver = PDS_CORE_IDENTITY_VERSION_1,
+ };
+
+ return pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static void pdsc_init_devinfo(struct pdsc *pdsc)
+{
+ pdsc->dev_info.asic_type = ioread8(&pdsc->info_regs->asic_type);
+ pdsc->dev_info.asic_rev = ioread8(&pdsc->info_regs->asic_rev);
+ pdsc->fw_generation = PDS_CORE_FW_STS_F_GENERATION &
+ ioread8(&pdsc->info_regs->fw_status);
+
+ memcpy_fromio(pdsc->dev_info.fw_version,
+ pdsc->info_regs->fw_version,
+ PDS_CORE_DEVINFO_FWVERS_BUFLEN);
+ pdsc->dev_info.fw_version[PDS_CORE_DEVINFO_FWVERS_BUFLEN] = 0;
+
+ memcpy_fromio(pdsc->dev_info.serial_num,
+ pdsc->info_regs->serial_num,
+ PDS_CORE_DEVINFO_SERIAL_BUFLEN);
+ pdsc->dev_info.serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN] = 0;
+
+ dev_dbg(pdsc->dev, "fw_version %s\n", pdsc->dev_info.fw_version);
+}
+
+static int pdsc_identify(struct pdsc *pdsc)
+{
+ struct pds_core_drv_identity drv = {};
+ size_t sz;
+ int err;
+
+ drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX);
+ snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
+ "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+
+ /* Next let's get some info about the device
+ * We use the devcmd_lock at this level in order to
+ * get safe access to the cmd_regs->data before anyone
+ * else can mess it up
+ */
+ mutex_lock(&pdsc->devcmd_lock);
+
+ sz = min_t(size_t, sizeof(drv), sizeof(pdsc->cmd_regs->data));
+ memcpy_toio(&pdsc->cmd_regs->data, &drv, sz);
+
+ err = pdsc_devcmd_identify_locked(pdsc);
+ if (!err) {
+ sz = min_t(size_t, sizeof(pdsc->dev_ident),
+ sizeof(pdsc->cmd_regs->data));
+ memcpy_fromio(&pdsc->dev_ident, &pdsc->cmd_regs->data, sz);
+ }
+ mutex_unlock(&pdsc->devcmd_lock);
+
+ if (err) {
+ dev_err(pdsc->dev, "Cannot identify device: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ if (isprint(pdsc->dev_info.fw_version[0]) &&
+ isascii(pdsc->dev_info.fw_version[0]))
+ dev_info(pdsc->dev, "FW: %.*s\n",
+ (int)(sizeof(pdsc->dev_info.fw_version) - 1),
+ pdsc->dev_info.fw_version);
+ else
+ dev_info(pdsc->dev, "FW: (invalid string) 0x%02x 0x%02x 0x%02x 0x%02x ...\n",
+ (u8)pdsc->dev_info.fw_version[0],
+ (u8)pdsc->dev_info.fw_version[1],
+ (u8)pdsc->dev_info.fw_version[2],
+ (u8)pdsc->dev_info.fw_version[3]);
+
+ return 0;
+}
+
+int pdsc_dev_reinit(struct pdsc *pdsc)
+{
+ pdsc_init_devinfo(pdsc);
+
+ return pdsc_identify(pdsc);
+}
+
+int pdsc_dev_init(struct pdsc *pdsc)
+{
+ unsigned int nintrs;
+ int err;
+
+ /* Initial init and reset of device */
+ pdsc_init_devinfo(pdsc);
+ pdsc->devcmd_timeout = PDS_CORE_DEVCMD_TIMEOUT;
+
+ err = pdsc_devcmd_reset(pdsc);
+ if (err)
+ return err;
+
+ err = pdsc_identify(pdsc);
+ if (err)
+ return err;
+
+ pdsc_debugfs_add_ident(pdsc);
+
+ /* Now we can reserve interrupts */
+ nintrs = le32_to_cpu(pdsc->dev_ident.nintrs);
+ nintrs = min_t(unsigned int, num_online_cpus(), nintrs);
+
+ /* Get intr_info struct array for tracking */
+ pdsc->intr_info = kcalloc(nintrs, sizeof(*pdsc->intr_info), GFP_KERNEL);
+ if (!pdsc->intr_info) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ err = pci_alloc_irq_vectors(pdsc->pdev, nintrs, nintrs, PCI_IRQ_MSIX);
+ if (err != nintrs) {
+ dev_err(pdsc->dev, "Can't get %d intrs from OS: %pe\n",
+ nintrs, ERR_PTR(err));
+ err = -ENOSPC;
+ goto err_out;
+ }
+ pdsc->nintrs = nintrs;
+
+ return 0;
+
+err_out:
+ kfree(pdsc->intr_info);
+ pdsc->intr_info = NULL;
+
+ return err;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
new file mode 100644
index 000000000000..9c6b3653c1c7
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include "core.h"
+#include <linux/pds/pds_auxbus.h>
+
+static struct
+pdsc_viftype *pdsc_dl_find_viftype_by_id(struct pdsc *pdsc,
+ enum devlink_param_type dl_id)
+{
+ int vt;
+
+ for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
+ if (pdsc->viftype_status[vt].dl_id == dl_id)
+ return &pdsc->viftype_status[vt];
+ }
+
+ return NULL;
+}
+
+int pdsc_dl_enable_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct pdsc *pdsc = devlink_priv(dl);
+ struct pdsc_viftype *vt_entry;
+
+ vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+ if (!vt_entry)
+ return -ENOENT;
+
+ ctx->val.vbool = vt_entry->enabled;
+
+ return 0;
+}
+
+int pdsc_dl_enable_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct pdsc *pdsc = devlink_priv(dl);
+ struct pdsc_viftype *vt_entry;
+ int err = 0;
+ int vf_id;
+
+ vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+ if (!vt_entry || !vt_entry->supported)
+ return -EOPNOTSUPP;
+
+ if (vt_entry->enabled == ctx->val.vbool)
+ return 0;
+
+ vt_entry->enabled = ctx->val.vbool;
+ for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) {
+ struct pdsc *vf = pdsc->vfs[vf_id].vf;
+
+ err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) :
+ pdsc_auxbus_dev_del(vf, pdsc);
+ }
+
+ return err;
+}
+
+int pdsc_dl_enable_validate(struct devlink *dl, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct pdsc *pdsc = devlink_priv(dl);
+ struct pdsc_viftype *vt_entry;
+
+ vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+ if (!vt_entry || !vt_entry->supported)
+ return -EOPNOTSUPP;
+
+ if (!pdsc->viftype_status[vt_entry->vif_id].supported)
+ return -ENODEV;
+
+ return 0;
+}
+
+int pdsc_dl_flash_update(struct devlink *dl,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct pdsc *pdsc = devlink_priv(dl);
+
+ return pdsc_firmware_update(pdsc, params->fw, extack);
+}
+
+static char *fw_slotnames[] = {
+ "fw.goldfw",
+ "fw.mainfwa",
+ "fw.mainfwb",
+};
+
+int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ union pds_core_dev_cmd cmd = {
+ .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+ .fw_control.oper = PDS_CORE_FW_GET_LIST,
+ };
+ struct pds_core_fw_list_info fw_list;
+ struct pdsc *pdsc = devlink_priv(dl);
+ union pds_core_dev_comp comp;
+ char buf[16];
+ int listlen;
+ int err;
+ int i;
+
+ mutex_lock(&pdsc->devcmd_lock);
+ err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2);
+ memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
+ mutex_unlock(&pdsc->devcmd_lock);
+ if (err && err != -EIO)
+ return err;
+
+ listlen = fw_list.num_fw_slots;
+ for (i = 0; i < listlen; i++) {
+ if (i < ARRAY_SIZE(fw_slotnames))
+ strscpy(buf, fw_slotnames[i], sizeof(buf));
+ else
+ snprintf(buf, sizeof(buf), "fw.slot_%d", i);
+ err = devlink_info_version_stored_put(req, buf,
+ fw_list.fw_names[i].fw_version);
+ }
+
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ pdsc->dev_info.fw_version);
+ if (err)
+ return err;
+
+ snprintf(buf, sizeof(buf), "0x%x", pdsc->dev_info.asic_type);
+ err = devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+ buf);
+ if (err)
+ return err;
+
+ snprintf(buf, sizeof(buf), "0x%x", pdsc->dev_info.asic_rev);
+ err = devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
+ buf);
+ if (err)
+ return err;
+
+ return devlink_info_serial_number_put(req, pdsc->dev_info.serial_num);
+}
+
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct pdsc *pdsc = devlink_health_reporter_priv(reporter);
+ int err;
+
+ mutex_lock(&pdsc->config_lock);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
+ err = devlink_fmsg_string_pair_put(fmsg, "Status", "dead");
+ else if (!pdsc_is_fw_good(pdsc))
+ err = devlink_fmsg_string_pair_put(fmsg, "Status", "unhealthy");
+ else
+ err = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
+
+ mutex_unlock(&pdsc->config_lock);
+
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "State",
+ pdsc->fw_status &
+ ~PDS_CORE_FW_STS_F_GENERATION);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "Generation",
+ pdsc->fw_generation >> 4);
+ if (err)
+ return err;
+
+ return devlink_fmsg_u32_pair_put(fmsg, "Recoveries",
+ pdsc->fw_recoveries);
+}
diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c
new file mode 100644
index 000000000000..90a811f3878a
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/fw.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include "core.h"
+
+/* The worst case wait for the install activity is about 25 minutes when
+ * installing a new CPLD, which is very seldom. Normal is about 30-35
+ * seconds. Since the driver can't tell if a CPLD update will happen we
+ * set the timeout for the ugly case.
+ */
+#define PDSC_FW_INSTALL_TIMEOUT (25 * 60)
+#define PDSC_FW_SELECT_TIMEOUT 30
+
+/* Number of periodic log updates during fw file download */
+#define PDSC_FW_INTERVAL_FRACTION 32
+
+static int pdsc_devcmd_fw_download_locked(struct pdsc *pdsc, u64 addr,
+ u32 offset, u32 length)
+{
+ union pds_core_dev_cmd cmd = {
+ .fw_download.opcode = PDS_CORE_CMD_FW_DOWNLOAD,
+ .fw_download.offset = cpu_to_le32(offset),
+ .fw_download.addr = cpu_to_le64(addr),
+ .fw_download.length = cpu_to_le32(length),
+ };
+ union pds_core_dev_comp comp;
+
+ return pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static int pdsc_devcmd_fw_install(struct pdsc *pdsc)
+{
+ union pds_core_dev_cmd cmd = {
+ .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+ .fw_control.oper = PDS_CORE_FW_INSTALL_ASYNC
+ };
+ union pds_core_dev_comp comp;
+ int err;
+
+ err = pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+ if (err < 0)
+ return err;
+
+ return comp.fw_control.slot;
+}
+
+static int pdsc_devcmd_fw_activate(struct pdsc *pdsc,
+ enum pds_core_fw_slot slot)
+{
+ union pds_core_dev_cmd cmd = {
+ .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+ .fw_control.oper = PDS_CORE_FW_ACTIVATE_ASYNC,
+ .fw_control.slot = slot
+ };
+ union pds_core_dev_comp comp;
+
+ return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static int pdsc_fw_status_long_wait(struct pdsc *pdsc,
+ const char *label,
+ unsigned long timeout,
+ u8 fw_cmd,
+ struct netlink_ext_ack *extack)
+{
+ union pds_core_dev_cmd cmd = {
+ .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+ .fw_control.oper = fw_cmd,
+ };
+ union pds_core_dev_comp comp;
+ unsigned long start_time;
+ unsigned long end_time;
+ int err;
+
+ /* Ping on the status of the long running async install
+ * command. We get EAGAIN while the command is still
+ * running, else we get the final command status.
+ */
+ start_time = jiffies;
+ end_time = start_time + (timeout * HZ);
+ do {
+ err = pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+ msleep(20);
+ } while (time_before(jiffies, end_time) &&
+ (err == -EAGAIN || err == -ETIMEDOUT));
+
+ if (err == -EAGAIN || err == -ETIMEDOUT) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware wait timed out");
+ dev_err(pdsc->dev, "DEV_CMD firmware wait %s timed out\n",
+ label);
+ } else if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware wait failed");
+ }
+
+ return err;
+}
+
+int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
+ struct netlink_ext_ack *extack)
+{
+ u32 buf_sz, copy_sz, offset;
+ struct devlink *dl;
+ int next_interval;
+ u64 data_addr;
+ int err = 0;
+ int fw_slot;
+
+ dev_info(pdsc->dev, "Installing firmware\n");
+
+ dl = priv_to_devlink(pdsc);
+ devlink_flash_update_status_notify(dl, "Preparing to flash",
+ NULL, 0, 0);
+
+ buf_sz = sizeof(pdsc->cmd_regs->data);
+
+ dev_dbg(pdsc->dev,
+ "downloading firmware - size %d part_sz %d nparts %lu\n",
+ (int)fw->size, buf_sz, DIV_ROUND_UP(fw->size, buf_sz));
+
+ offset = 0;
+ next_interval = 0;
+ data_addr = offsetof(struct pds_core_dev_cmd_regs, data);
+ while (offset < fw->size) {
+ if (offset >= next_interval) {
+ devlink_flash_update_status_notify(dl, "Downloading",
+ NULL, offset,
+ fw->size);
+ next_interval = offset +
+ (fw->size / PDSC_FW_INTERVAL_FRACTION);
+ }
+
+ copy_sz = min_t(unsigned int, buf_sz, fw->size - offset);
+ mutex_lock(&pdsc->devcmd_lock);
+ memcpy_toio(&pdsc->cmd_regs->data, fw->data + offset, copy_sz);
+ err = pdsc_devcmd_fw_download_locked(pdsc, data_addr,
+ offset, copy_sz);
+ mutex_unlock(&pdsc->devcmd_lock);
+ if (err) {
+ dev_err(pdsc->dev,
+ "download failed offset 0x%x addr 0x%llx len 0x%x: %pe\n",
+ offset, data_addr, copy_sz, ERR_PTR(err));
+ NL_SET_ERR_MSG_MOD(extack, "Segment download failed");
+ goto err_out;
+ }
+ offset += copy_sz;
+ }
+ devlink_flash_update_status_notify(dl, "Downloading", NULL,
+ fw->size, fw->size);
+
+ devlink_flash_update_timeout_notify(dl, "Installing", NULL,
+ PDSC_FW_INSTALL_TIMEOUT);
+
+ fw_slot = pdsc_devcmd_fw_install(pdsc);
+ if (fw_slot < 0) {
+ err = fw_slot;
+ dev_err(pdsc->dev, "install failed: %pe\n", ERR_PTR(err));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware install");
+ goto err_out;
+ }
+
+ err = pdsc_fw_status_long_wait(pdsc, "Installing",
+ PDSC_FW_INSTALL_TIMEOUT,
+ PDS_CORE_FW_INSTALL_STATUS,
+ extack);
+ if (err)
+ goto err_out;
+
+ devlink_flash_update_timeout_notify(dl, "Selecting", NULL,
+ PDSC_FW_SELECT_TIMEOUT);
+
+ err = pdsc_devcmd_fw_activate(pdsc, fw_slot);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware select");
+ goto err_out;
+ }
+
+ err = pdsc_fw_status_long_wait(pdsc, "Selecting",
+ PDSC_FW_SELECT_TIMEOUT,
+ PDS_CORE_FW_ACTIVATE_STATUS,
+ extack);
+ if (err)
+ goto err_out;
+
+ dev_info(pdsc->dev, "Firmware update completed, slot %d\n", fw_slot);
+
+err_out:
+ if (err)
+ devlink_flash_update_status_notify(dl, "Flash failed",
+ NULL, 0, 0);
+ else
+ devlink_flash_update_status_notify(dl, "Flash done",
+ NULL, 0, 0);
+ return err;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
new file mode 100644
index 000000000000..e2d14b1ca471
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/pci.h>
+
+#include <linux/pds/pds_common.h>
+
+#include "core.h"
+
+MODULE_DESCRIPTION(PDSC_DRV_DESCRIPTION);
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_LICENSE("GPL");
+
+/* Supported devices */
+static const struct pci_device_id pdsc_id_table[] = {
+ { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_CORE_PF) },
+ { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_VDPA_VF) },
+ { 0, } /* end of table */
+};
+MODULE_DEVICE_TABLE(pci, pdsc_id_table);
+
+static void pdsc_wdtimer_cb(struct timer_list *t)
+{
+ struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
+
+ dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
+ mod_timer(&pdsc->wdtimer,
+ round_jiffies(jiffies + pdsc->wdtimer_period));
+
+ queue_work(pdsc->wq, &pdsc->health_work);
+}
+
+static void pdsc_unmap_bars(struct pdsc *pdsc)
+{
+ struct pdsc_dev_bar *bars = pdsc->bars;
+ unsigned int i;
+
+ for (i = 0; i < PDS_CORE_BARS_MAX; i++) {
+ if (bars[i].vaddr)
+ pci_iounmap(pdsc->pdev, bars[i].vaddr);
+ }
+}
+
+static int pdsc_map_bars(struct pdsc *pdsc)
+{
+ struct pdsc_dev_bar *bar = pdsc->bars;
+ struct pci_dev *pdev = pdsc->pdev;
+ struct device *dev = pdsc->dev;
+ struct pdsc_dev_bar *bars;
+ unsigned int i, j;
+ int num_bars = 0;
+ int err;
+ u32 sig;
+
+ bars = pdsc->bars;
+
+ /* Since the PCI interface in the hardware is configurable,
+ * we need to poke into all the bars to find the set we're
+ * expecting.
+ */
+ for (i = 0, j = 0; i < PDS_CORE_BARS_MAX; i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+
+ bars[j].len = pci_resource_len(pdev, i);
+ bars[j].bus_addr = pci_resource_start(pdev, i);
+ bars[j].res_index = i;
+
+ /* only map the whole bar 0 */
+ if (j > 0) {
+ bars[j].vaddr = NULL;
+ } else {
+ bars[j].vaddr = pci_iomap(pdev, i, bars[j].len);
+ if (!bars[j].vaddr) {
+ dev_err(dev, "Cannot map BAR %d, aborting\n", i);
+ return -ENODEV;
+ }
+ }
+
+ j++;
+ }
+ num_bars = j;
+
+ /* BAR0: dev_cmd and interrupts */
+ if (num_bars < 1) {
+ dev_err(dev, "No bars found\n");
+ err = -EFAULT;
+ goto err_out;
+ }
+
+ if (bar->len < PDS_CORE_BAR0_SIZE) {
+ dev_err(dev, "Resource bar size %lu too small\n", bar->len);
+ err = -EFAULT;
+ goto err_out;
+ }
+
+ pdsc->info_regs = bar->vaddr + PDS_CORE_BAR0_DEV_INFO_REGS_OFFSET;
+ pdsc->cmd_regs = bar->vaddr + PDS_CORE_BAR0_DEV_CMD_REGS_OFFSET;
+ pdsc->intr_status = bar->vaddr + PDS_CORE_BAR0_INTR_STATUS_OFFSET;
+ pdsc->intr_ctrl = bar->vaddr + PDS_CORE_BAR0_INTR_CTRL_OFFSET;
+
+ sig = ioread32(&pdsc->info_regs->signature);
+ if (sig != PDS_CORE_DEV_INFO_SIGNATURE) {
+ dev_err(dev, "Incompatible firmware signature %x", sig);
+ err = -EFAULT;
+ goto err_out;
+ }
+
+ /* BAR1: doorbells */
+ bar++;
+ if (num_bars < 2) {
+ dev_err(dev, "Doorbell bar missing\n");
+ err = -EFAULT;
+ goto err_out;
+ }
+
+ pdsc->db_pages = bar->vaddr;
+ pdsc->phy_db_pages = bar->bus_addr;
+
+ return 0;
+
+err_out:
+ pdsc_unmap_bars(pdsc);
+ return err;
+}
+
+void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num)
+{
+ return pci_iomap_range(pdsc->pdev,
+ pdsc->bars[PDS_CORE_PCI_BAR_DBELL].res_index,
+ (u64)page_num << PAGE_SHIFT, PAGE_SIZE);
+}
+
+static int pdsc_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct pdsc *pdsc = pci_get_drvdata(pdev);
+ struct device *dev = pdsc->dev;
+ int ret = 0;
+
+ if (num_vfs > 0) {
+ pdsc->vfs = kcalloc(num_vfs, sizeof(struct pdsc_vf),
+ GFP_KERNEL);
+ if (!pdsc->vfs)
+ return -ENOMEM;
+ pdsc->num_vfs = num_vfs;
+
+ ret = pci_enable_sriov(pdev, num_vfs);
+ if (ret) {
+ dev_err(dev, "Cannot enable SRIOV: %pe\n",
+ ERR_PTR(ret));
+ goto no_vfs;
+ }
+
+ return num_vfs;
+ }
+
+no_vfs:
+ pci_disable_sriov(pdev);
+
+ kfree(pdsc->vfs);
+ pdsc->vfs = NULL;
+ pdsc->num_vfs = 0;
+
+ return ret;
+}
+
+static int pdsc_init_vf(struct pdsc *vf)
+{
+ struct devlink *dl;
+ struct pdsc *pf;
+ int err;
+
+ pf = pdsc_get_pf_struct(vf->pdev);
+ if (IS_ERR_OR_NULL(pf))
+ return PTR_ERR(pf) ?: -1;
+
+ vf->vf_id = pci_iov_vf_id(vf->pdev);
+
+ dl = priv_to_devlink(vf);
+ devl_lock(dl);
+ devl_register(dl);
+ devl_unlock(dl);
+
+ pf->vfs[vf->vf_id].vf = vf;
+ err = pdsc_auxbus_dev_add(vf, pf);
+ if (err) {
+ devl_lock(dl);
+ devl_unregister(dl);
+ devl_unlock(dl);
+ }
+
+ return err;
+}
+
+static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
+ .name = "fw",
+ .diagnose = pdsc_fw_reporter_diagnose,
+};
+
+static const struct devlink_param pdsc_dl_params[] = {
+ DEVLINK_PARAM_GENERIC(ENABLE_VNET,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ pdsc_dl_enable_get,
+ pdsc_dl_enable_set,
+ pdsc_dl_enable_validate),
+};
+
+#define PDSC_WQ_NAME_LEN 24
+
+static int pdsc_init_pf(struct pdsc *pdsc)
+{
+ struct devlink_health_reporter *hr;
+ char wq_name[PDSC_WQ_NAME_LEN];
+ struct devlink *dl;
+ int err;
+
+ pcie_print_link_status(pdsc->pdev);
+
+ err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME);
+ if (err) {
+ dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ err = pdsc_map_bars(pdsc);
+ if (err)
+ goto err_out_release_regions;
+
+ /* General workqueue and timer, but don't start timer yet */
+ snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
+ pdsc->wq = create_singlethread_workqueue(wq_name);
+ INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+ pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
+
+ mutex_init(&pdsc->devcmd_lock);
+ mutex_init(&pdsc->config_lock);
+ spin_lock_init(&pdsc->adminq_lock);
+
+ mutex_lock(&pdsc->config_lock);
+ set_bit(PDSC_S_FW_DEAD, &pdsc->state);
+
+ err = pdsc_setup(pdsc, PDSC_SETUP_INIT);
+ if (err)
+ goto err_out_unmap_bars;
+ err = pdsc_start(pdsc);
+ if (err)
+ goto err_out_teardown;
+
+ mutex_unlock(&pdsc->config_lock);
+
+ dl = priv_to_devlink(pdsc);
+ devl_lock(dl);
+ err = devl_params_register(dl, pdsc_dl_params,
+ ARRAY_SIZE(pdsc_dl_params));
+ if (err) {
+ dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n",
+ ERR_PTR(err));
+ goto err_out_unlock_dl;
+ }
+
+ hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
+ if (IS_ERR(hr)) {
+ dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
+ err = PTR_ERR(hr);
+ goto err_out_unreg_params;
+ }
+ pdsc->fw_reporter = hr;
+
+ devl_register(dl);
+ devl_unlock(dl);
+
+ /* Lastly, start the health check timer */
+ mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
+
+ return 0;
+
+err_out_unreg_params:
+ devl_params_unregister(dl, pdsc_dl_params,
+ ARRAY_SIZE(pdsc_dl_params));
+err_out_unlock_dl:
+ devl_unlock(dl);
+ pdsc_stop(pdsc);
+err_out_teardown:
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
+err_out_unmap_bars:
+ mutex_unlock(&pdsc->config_lock);
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq)
+ destroy_workqueue(pdsc->wq);
+ mutex_destroy(&pdsc->config_lock);
+ mutex_destroy(&pdsc->devcmd_lock);
+ pci_free_irq_vectors(pdsc->pdev);
+ pdsc_unmap_bars(pdsc);
+err_out_release_regions:
+ pci_release_regions(pdsc->pdev);
+
+ return err;
+}
+
+static const struct devlink_ops pdsc_dl_ops = {
+ .info_get = pdsc_dl_info_get,
+ .flash_update = pdsc_dl_flash_update,
+};
+
+static const struct devlink_ops pdsc_dl_vf_ops = {
+};
+
+static DEFINE_IDA(pdsc_ida);
+
+static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ const struct devlink_ops *ops;
+ struct devlink *dl;
+ struct pdsc *pdsc;
+ bool is_pf;
+ int err;
+
+ is_pf = !pdev->is_virtfn;
+ ops = is_pf ? &pdsc_dl_ops : &pdsc_dl_vf_ops;
+ dl = devlink_alloc(ops, sizeof(struct pdsc), dev);
+ if (!dl)
+ return -ENOMEM;
+ pdsc = devlink_priv(dl);
+
+ pdsc->pdev = pdev;
+ pdsc->dev = &pdev->dev;
+ set_bit(PDSC_S_INITING_DRIVER, &pdsc->state);
+ pci_set_drvdata(pdev, pdsc);
+ pdsc_debugfs_add_dev(pdsc);
+
+ err = ida_alloc(&pdsc_ida, GFP_KERNEL);
+ if (err < 0) {
+ dev_err(pdsc->dev, "%s: id alloc failed: %pe\n",
+ __func__, ERR_PTR(err));
+ goto err_out_free_devlink;
+ }
+ pdsc->uid = err;
+
+ /* Query system for DMA addressing limitation for the device. */
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(PDS_CORE_ADDR_LEN));
+ if (err) {
+ dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting: %pe\n",
+ ERR_PTR(err));
+ goto err_out_free_ida;
+ }
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
+ goto err_out_free_ida;
+ }
+ pci_set_master(pdev);
+
+ if (is_pf)
+ err = pdsc_init_pf(pdsc);
+ else
+ err = pdsc_init_vf(pdsc);
+ if (err) {
+ dev_err(dev, "Cannot init device: %pe\n", ERR_PTR(err));
+ goto err_out_clear_master;
+ }
+
+ clear_bit(PDSC_S_INITING_DRIVER, &pdsc->state);
+ return 0;
+
+err_out_clear_master:
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+err_out_free_ida:
+ ida_free(&pdsc_ida, pdsc->uid);
+err_out_free_devlink:
+ pdsc_debugfs_del_dev(pdsc);
+ devlink_free(dl);
+
+ return err;
+}
+
+static void pdsc_remove(struct pci_dev *pdev)
+{
+ struct pdsc *pdsc = pci_get_drvdata(pdev);
+ struct devlink *dl;
+
+ /* Unhook the registrations first to be sure there
+ * are no requests while we're stopping.
+ */
+ dl = priv_to_devlink(pdsc);
+ devl_lock(dl);
+ devl_unregister(dl);
+ if (!pdev->is_virtfn) {
+ if (pdsc->fw_reporter) {
+ devl_health_reporter_destroy(pdsc->fw_reporter);
+ pdsc->fw_reporter = NULL;
+ }
+ devl_params_unregister(dl, pdsc_dl_params,
+ ARRAY_SIZE(pdsc_dl_params));
+ }
+ devl_unlock(dl);
+
+ if (pdev->is_virtfn) {
+ struct pdsc *pf;
+
+ pf = pdsc_get_pf_struct(pdsc->pdev);
+ if (!IS_ERR(pf)) {
+ pdsc_auxbus_dev_del(pdsc, pf);
+ pf->vfs[pdsc->vf_id].vf = NULL;
+ }
+ } else {
+ /* Remove the VFs and their aux_bus connections before other
+ * cleanup so that the clients can use the AdminQ to cleanly
+ * shut themselves down.
+ */
+ pdsc_sriov_configure(pdev, 0);
+
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq)
+ destroy_workqueue(pdsc->wq);
+
+ mutex_lock(&pdsc->config_lock);
+ set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
+
+ pdsc_stop(pdsc);
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
+ mutex_unlock(&pdsc->config_lock);
+ mutex_destroy(&pdsc->config_lock);
+ mutex_destroy(&pdsc->devcmd_lock);
+
+ pci_free_irq_vectors(pdev);
+ pdsc_unmap_bars(pdsc);
+ pci_release_regions(pdev);
+ }
+
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+
+ ida_free(&pdsc_ida, pdsc->uid);
+ pdsc_debugfs_del_dev(pdsc);
+ devlink_free(dl);
+}
+
+static struct pci_driver pdsc_driver = {
+ .name = PDS_CORE_DRV_NAME,
+ .id_table = pdsc_id_table,
+ .probe = pdsc_probe,
+ .remove = pdsc_remove,
+ .sriov_configure = pdsc_sriov_configure,
+};
+
+void *pdsc_get_pf_struct(struct pci_dev *vf_pdev)
+{
+ return pci_iov_get_pf_drvdata(vf_pdev, &pdsc_driver);
+}
+EXPORT_SYMBOL_GPL(pdsc_get_pf_struct);
+
+static int __init pdsc_init_module(void)
+{
+ if (strcmp(KBUILD_MODNAME, PDS_CORE_DRV_NAME))
+ return -EINVAL;
+
+ pdsc_debugfs_create();
+ return pci_register_driver(&pdsc_driver);
+}
+
+static void __exit pdsc_cleanup_module(void)
+{
+ pci_unregister_driver(&pdsc_driver);
+ pdsc_debugfs_destroy();
+}
+
+module_init(pdsc_init_module);
+module_exit(pdsc_cleanup_module);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
index d3526cd38f3d..414b2e448d59 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
@@ -124,7 +124,7 @@ static const struct hwmon_channel_info aq_hwmon_temp = {
.config = aq_hwmon_temp_config,
};
-static const struct hwmon_channel_info *aq_hwmon_info[] = {
+static const struct hwmon_channel_info * const aq_hwmon_info[] = {
&aq_hwmon_temp,
NULL,
};
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8ff5a4f98d6f..dcd9367f05af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -56,6 +56,7 @@
#include <linux/hwmon-sysfs.h>
#include <net/page_pool.h>
#include <linux/align.h>
+#include <net/netdev_queues.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
@@ -331,26 +332,6 @@ static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
txr->kick_pending = 0;
}
-static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp,
- struct bnxt_tx_ring_info *txr,
- struct netdev_queue *txq)
-{
- netif_tx_stop_queue(txq);
-
- /* netif_tx_stop_queue() must be done before checking
- * tx index in bnxt_tx_avail() below, because in
- * bnxt_tx_int(), we update tx index before checking for
- * netif_tx_queue_stopped().
- */
- smp_mb();
- if (bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh) {
- netif_tx_wake_queue(txq);
- return false;
- }
-
- return true;
-}
-
static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
@@ -384,7 +365,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (net_ratelimit() && txr->kick_pending)
netif_warn(bp, tx_err, dev,
"bnxt: ring busy w/ flush pending!\n");
- if (bnxt_txr_netif_try_stop_queue(bp, txr, txq))
+ if (!netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
+ bp->tx_wake_thresh))
return NETDEV_TX_BUSY;
}
@@ -490,7 +472,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
prod = NEXT_TX(prod);
tx_push->doorbell =
cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
- txr->tx_prod = prod;
+ WRITE_ONCE(txr->tx_prod, prod);
tx_buf->is_push = 1;
netdev_tx_sent_queue(txq, skb->len);
@@ -601,7 +583,7 @@ normal_tx:
wmb();
prod = NEXT_TX(prod);
- txr->tx_prod = prod;
+ WRITE_ONCE(txr->tx_prod, prod);
if (!netdev_xmit_more() || netif_xmit_stopped(txq))
bnxt_txr_db_kick(bp, txr, prod);
@@ -614,7 +596,8 @@ tx_done:
if (netdev_xmit_more() && !tx_buf->is_push)
bnxt_txr_db_kick(bp, txr, prod);
- bnxt_txr_netif_try_stop_queue(bp, txr, txq);
+ netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
+ bp->tx_wake_thresh);
}
return NETDEV_TX_OK;
@@ -705,20 +688,11 @@ next_tx_int:
dev_kfree_skb_any(skb);
}
- netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
- txr->tx_cons = cons;
-
- /* Need to make the tx_cons update visible to bnxt_start_xmit()
- * before checking for netif_tx_queue_stopped(). Without the
- * memory barrier, there is a small possibility that bnxt_start_xmit()
- * will miss it and cause the queue to be stopped forever.
- */
- smp_mb();
+ WRITE_ONCE(txr->tx_cons, cons);
- if (unlikely(netif_tx_queue_stopped(txq)) &&
- bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh &&
- READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING)
- netif_tx_wake_queue(txq);
+ __netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
+ bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
+ READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
}
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -2387,7 +2361,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
- if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ if (BNXT_PTP_USE_RTC(bp)) {
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
u64 ns;
@@ -3237,6 +3211,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
pp.pool_size = bp->rx_ring_size;
pp.nid = dev_to_node(&bp->pdev->dev);
+ pp.napi = &rxr->bnapi->napi;
pp.dev = &bp->pdev->dev;
pp.dma_dir = DMA_BIDIRECTIONAL;
@@ -7626,7 +7601,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
u8 flags;
int rc;
- if (bp->hwrm_spec_code < 0x10801) {
+ if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) {
rc = -ENODEV;
goto no_ptp;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 18cac98ba58e..080e73496066 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2231,13 +2231,12 @@ struct bnxt {
#define SFF_MODULE_ID_QSFP28 0x11
#define BNXT_MAX_PHY_I2C_RESP_SIZE 64
-static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
+static inline u32 bnxt_tx_avail(struct bnxt *bp,
+ const struct bnxt_tx_ring_info *txr)
{
- /* Tell compiler to fetch tx indices from memory. */
- barrier();
+ u32 used = READ_ONCE(txr->tx_prod) - READ_ONCE(txr->tx_cons);
- return bp->tx_ring_size -
- ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
+ return bp->tx_ring_size - (used & bp->tx_ring_mask);
}
static inline void bnxt_writeq(struct bnxt *bp, u64 val,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 3ed3a2b3b3a9..dde327f2c57e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -825,8 +825,24 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs)
if (rc)
goto err_out2;
+ if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ return 0;
+
+ /* Create representors for VFs in switchdev mode */
+ devl_lock(bp->dl);
+ rc = bnxt_vf_reps_create(bp);
+ devl_unlock(bp->dl);
+ if (rc) {
+ netdev_info(bp->dev, "Cannot enable VFS as representors cannot be created\n");
+ goto err_out3;
+ }
+
return 0;
+err_out3:
+ /* Disable SR-IOV */
+ pci_disable_sriov(bp->pdev);
+
err_out2:
/* Free the resources reserved for various VF's */
bnxt_hwrm_func_vf_resource_free(bp, *num_vfs);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index e7b5e28ee29f..852eb449ccae 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -304,7 +304,7 @@ void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
struct auxiliary_device *adev;
/* Skip if no auxiliary device init was done. */
- if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
+ if (!bp->aux_priv)
return;
aux_priv = bp->aux_priv;
@@ -324,6 +324,7 @@ static void bnxt_aux_dev_release(struct device *dev)
bp->edev = NULL;
kfree(aux_priv->edev);
kfree(aux_priv);
+ bp->aux_priv = NULL;
}
static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
@@ -359,19 +360,18 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
return;
- bp->aux_priv = kzalloc(sizeof(*bp->aux_priv), GFP_KERNEL);
- if (!bp->aux_priv)
+ aux_priv = kzalloc(sizeof(*bp->aux_priv), GFP_KERNEL);
+ if (!aux_priv)
goto exit;
- bp->aux_priv->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
- if (bp->aux_priv->id < 0) {
+ aux_priv->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
+ if (aux_priv->id < 0) {
netdev_warn(bp->dev,
"ida alloc failed for ROCE auxiliary device\n");
- kfree(bp->aux_priv);
+ kfree(aux_priv);
goto exit;
}
- aux_priv = bp->aux_priv;
aux_dev = &aux_priv->aux_dev;
aux_dev->id = aux_priv->id;
aux_dev->name = "rdma";
@@ -380,10 +380,11 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
rc = auxiliary_device_init(aux_dev);
if (rc) {
- ida_free(&bnxt_aux_dev_ids, bp->aux_priv->id);
- kfree(bp->aux_priv);
+ ida_free(&bnxt_aux_dev_ids, aux_priv->id);
+ kfree(aux_priv);
goto exit;
}
+ bp->aux_priv = aux_priv;
/* From this point, all cleanup will happen via the .release callback &
* any error unwinding will need to include a call to
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index fcc65890820a..2f1a1f2d2157 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -356,10 +356,15 @@ void bnxt_vf_reps_destroy(struct bnxt *bp)
/* un-publish cfa_code_map so that RX path can't see it anymore */
kfree(bp->cfa_code_map);
bp->cfa_code_map = NULL;
- bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
- if (closed)
+ if (closed) {
+ /* Temporarily set legacy mode to avoid re-opening
+ * representors and restore switchdev mode after that.
+ */
+ bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
bnxt_open_nic(bp, false, false);
+ bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ }
rtnl_unlock();
/* Need to call vf_reps_destroy() outside of rntl_lock
@@ -482,7 +487,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
dev->min_mtu = ETH_ZLEN;
}
-static int bnxt_vf_reps_create(struct bnxt *bp)
+int bnxt_vf_reps_create(struct bnxt *bp)
{
u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev);
struct bnxt_vf_rep *vf_rep;
@@ -535,7 +540,6 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
/* publish cfa_code_map only after all VF-reps have been initialized */
bp->cfa_code_map = cfa_code_map;
- bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
netif_keep_dst(bp->dev);
return 0;
@@ -559,6 +563,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
+ int ret = 0;
if (bp->eswitch_mode == mode) {
netdev_info(bp->dev, "already in %s eswitch mode\n",
@@ -570,7 +575,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
switch (mode) {
case DEVLINK_ESWITCH_MODE_LEGACY:
bnxt_vf_reps_destroy(bp);
- return 0;
+ break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
if (bp->hwrm_spec_code < 0x10803) {
@@ -578,15 +583,19 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
return -ENOTSUPP;
}
- if (pci_num_vf(bp->pdev) == 0) {
- netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n");
- return -EPERM;
- }
- return bnxt_vf_reps_create(bp);
+ /* Create representors for existing VFs */
+ if (pci_num_vf(bp->pdev) > 0)
+ ret = bnxt_vf_reps_create(bp);
+ break;
default:
return -EINVAL;
}
+
+ if (!ret)
+ bp->eswitch_mode = mode;
+
+ return ret;
}
#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index 5637a84884d7..33a965631d0b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -14,6 +14,7 @@
#define MAX_CFA_CODE 65536
+int bnxt_vf_reps_create(struct bnxt *bp);
void bnxt_vf_reps_destroy(struct bnxt *bp);
void bnxt_vf_reps_close(struct bnxt *bp);
void bnxt_vf_reps_open(struct bnxt *bp);
@@ -37,6 +38,11 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
#else
+static inline int bnxt_vf_reps_create(struct bnxt *bp)
+{
+ return 0;
+}
+
static inline void bnxt_vf_reps_close(struct bnxt *bp)
{
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 5843c93b1711..4efa5fe6972b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -64,7 +64,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
int frag_len;
prod = NEXT_TX(prod);
- txr->tx_prod = prod;
+ WRITE_ONCE(txr->tx_prod, prod);
/* first fill up the first buffer */
frag_tx_buf = &txr->tx_buf_ring[prod];
@@ -94,7 +94,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
/* Sync TX BD */
wmb();
prod = NEXT_TX(prod);
- txr->tx_prod = prod;
+ WRITE_ONCE(txr->tx_prod, prod);
return tx_buf;
}
@@ -161,7 +161,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
}
tx_cons = NEXT_TX(tx_cons);
}
- txr->tx_cons = tx_cons;
+ WRITE_ONCE(txr->tx_cons, tx_cons);
if (rx_doorbell_needed) {
tx_buf = &txr->tx_buf_ring[last_tx_cons];
bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index c1fc91c97cee..cfbdd0022764 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -95,6 +95,8 @@
#define GEM_SA4B 0x00A0 /* Specific4 Bottom */
#define GEM_SA4T 0x00A4 /* Specific4 Top */
#define GEM_WOL 0x00b8 /* Wake on LAN */
+#define GEM_RXPTPUNI 0x00D4 /* PTP RX Unicast address */
+#define GEM_TXPTPUNI 0x00D8 /* PTP TX Unicast address */
#define GEM_EFTSH 0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
#define GEM_EFRSH 0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
#define GEM_PEFTSH 0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
@@ -245,6 +247,8 @@
#define MACB_TZQ_OFFSET 12 /* Transmit zero quantum pause frame */
#define MACB_TZQ_SIZE 1
#define MACB_SRTSM_OFFSET 15 /* Store Receive Timestamp to Memory */
+#define MACB_PTPUNI_OFFSET 20 /* PTP Unicast packet enable */
+#define MACB_PTPUNI_SIZE 1
#define MACB_OSSMODE_OFFSET 24 /* Enable One Step Synchro Mode */
#define MACB_OSSMODE_SIZE 1
#define MACB_MIIONRGMII_OFFSET 28 /* MII Usage on RGMII Interface */
@@ -1363,7 +1367,7 @@ static inline bool macb_is_gem(struct macb *bp)
static inline bool gem_has_ptp(struct macb *bp)
{
- return !!(bp->caps & MACB_CAPS_GEM_HAS_PTP);
+ return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && (bp->caps & MACB_CAPS_GEM_HAS_PTP);
}
/**
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f77bd1223c8f..29a1199dad14 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -287,6 +287,11 @@ static void macb_set_hwaddr(struct macb *bp)
top = cpu_to_le16(*((u16 *)(bp->dev->dev_addr + 4)));
macb_or_gem_writel(bp, SA1T, top);
+ if (gem_has_ptp(bp)) {
+ gem_writel(bp, RXPTPUNI, bottom);
+ gem_writel(bp, TXPTPUNI, bottom);
+ }
+
/* Clear unused address register sets */
macb_or_gem_writel(bp, SA2B, 0);
macb_or_gem_writel(bp, SA2T, 0);
@@ -773,8 +778,12 @@ static void macb_mac_link_up(struct phylink_config *config,
spin_unlock_irqrestore(&bp->lock, flags);
- /* Enable Rx and Tx */
- macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE));
+ /* Enable Rx and Tx; Enable PTP unicast */
+ ctrl = macb_readl(bp, NCR);
+ if (gem_has_ptp(bp))
+ ctrl |= MACB_BIT(PTPUNI);
+
+ macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
netif_tx_wake_all_queues(ndev);
}
@@ -1063,6 +1072,10 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
}
#endif
addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_MACB_USE_HWSTAMP
+ if (bp->hw_dma_cap & HW_DMA_CAP_PTP)
+ addr &= ~GEM_BIT(DMA_RXVALID);
+#endif
return addr;
}
@@ -3889,17 +3902,17 @@ static void macb_configure_caps(struct macb *bp,
dcfg = gem_readl(bp, DCFG2);
if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0)
bp->caps |= MACB_CAPS_FIFO_MODE;
-#ifdef CONFIG_MACB_USE_HWSTAMP
if (gem_has_ptp(bp)) {
if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5)))
dev_err(&bp->pdev->dev,
"GEM doesn't support hardware ptp.\n");
else {
+#ifdef CONFIG_MACB_USE_HWSTAMP
bp->hw_dma_cap |= HW_DMA_CAP_PTP;
bp->ptp_info = &gem_ptp_info;
+#endif
}
}
-#endif
}
dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index f962a95068a0..51d26fa190d7 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -258,6 +258,8 @@ static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1,
*/
gem_tsu_get_time(&bp->ptp_clock_info, &tsu, NULL);
+ ts->tv_sec |= ((~GEM_DMA_SEC_MASK) & tsu.tv_sec);
+
/* If the top bit is set in the timestamp,
* but not in 1588 timer, it has rolled over,
* so subtract max size
@@ -266,8 +268,6 @@ static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1,
!(tsu.tv_sec & (GEM_DMA_SEC_TOP >> 1)))
ts->tv_sec -= GEM_DMA_SEC_TOP;
- ts->tv_sec += ((~GEM_DMA_SEC_MASK) & tsu.tv_sec);
-
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index dd9be229819a..d3541159487d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -1135,7 +1135,7 @@ void cxgb4_cleanup_tc_flower(struct adapter *adap)
return;
if (adap->flower_stats_timer.function)
- del_timer_sync(&adap->flower_stats_timer);
+ timer_shutdown_sync(&adap->flower_stats_timer);
cancel_work_sync(&adap->flower_stats_work);
rhashtable_destroy(&adap->flower_tbl);
adap->tc_flower_initialized = false;
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index f1e80d6996ef..1c78f66a89da 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -71,6 +71,7 @@ config FSL_XGMAC_MDIO
tristate "Freescale XGMAC MDIO"
select PHYLIB
depends on OF
+ select MDIO_DEVRES
select OF_MDIO
help
This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 9bc099cf3cb1..4d75e6807e92 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -10,6 +10,7 @@ config FSL_ENETC_CORE
config FSL_ENETC
tristate "ENETC PF driver"
depends on PCI_MSI
+ select MDIO_DEVRES
select FSL_ENETC_CORE
select FSL_ENETC_IERB
select FSL_ENETC_MDIO
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 2fc712b24d12..3c4fa26f0f9b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -25,6 +25,13 @@ void enetc_port_mac_wr(struct enetc_si *si, u32 reg, u32 val)
}
EXPORT_SYMBOL_GPL(enetc_port_mac_wr);
+static void enetc_change_preemptible_tcs(struct enetc_ndev_priv *priv,
+ u8 preemptible_tcs)
+{
+ priv->preemptible_tcs = preemptible_tcs;
+ enetc_mm_commit_preemptible_tcs(priv);
+}
+
static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv)
{
int num_tx_rings = priv->num_tx_rings;
@@ -2640,16 +2647,19 @@ static void enetc_reset_tc_mqprio(struct net_device *ndev)
}
enetc_debug_tx_ring_prios(priv);
+
+ enetc_change_preemptible_tcs(priv, 0);
}
int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
{
+ struct tc_mqprio_qopt_offload *mqprio = type_data;
struct enetc_ndev_priv *priv = netdev_priv(ndev);
- struct tc_mqprio_qopt *mqprio = type_data;
+ struct tc_mqprio_qopt *qopt = &mqprio->qopt;
struct enetc_hw *hw = &priv->si->hw;
int num_stack_tx_queues = 0;
- u8 num_tc = mqprio->num_tc;
struct enetc_bdr *tx_ring;
+ u8 num_tc = qopt->num_tc;
int offset, count;
int err, tc, q;
@@ -2663,8 +2673,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
return err;
for (tc = 0; tc < num_tc; tc++) {
- offset = mqprio->offset[tc];
- count = mqprio->count[tc];
+ offset = qopt->offset[tc];
+ count = qopt->count[tc];
num_stack_tx_queues += count;
err = netdev_set_tc_queue(ndev, tc, count, offset);
@@ -2693,6 +2703,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
enetc_debug_tx_ring_prios(priv);
+ enetc_change_preemptible_tcs(priv, mqprio->preemptible_tcs);
+
return 0;
err_reset_tc:
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 8010f31cd10d..c97a8e3d7a7f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -355,6 +355,9 @@ struct enetc_ndev_priv {
u16 rx_bd_count, tx_bd_count;
u16 msg_enable;
+
+ u8 preemptible_tcs;
+
enum enetc_active_offloads active_offloads;
u32 speed; /* store speed for compare update pspeed */
@@ -433,6 +436,7 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
/* ethtool */
void enetc_set_ethtool_ops(struct net_device *ndev);
void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link);
+void enetc_mm_commit_preemptible_tcs(struct enetc_ndev_priv *priv);
/* control buffer descriptor ring (CBDR) */
int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index da9d4b310fcd..e993ed04ab57 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -32,6 +32,12 @@ static const u32 enetc_port_regs[] = {
ENETC_PM0_CMD_CFG, ENETC_PM0_MAXFRM, ENETC_PM0_IF_MODE
};
+static const u32 enetc_port_mm_regs[] = {
+ ENETC_MMCSR, ENETC_PFPMR, ENETC_PTCFPR(0), ENETC_PTCFPR(1),
+ ENETC_PTCFPR(2), ENETC_PTCFPR(3), ENETC_PTCFPR(4), ENETC_PTCFPR(5),
+ ENETC_PTCFPR(6), ENETC_PTCFPR(7),
+};
+
static int enetc_get_reglen(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -45,6 +51,9 @@ static int enetc_get_reglen(struct net_device *ndev)
if (hw->port)
len += ARRAY_SIZE(enetc_port_regs);
+ if (hw->port && !!(priv->si->hw_features & ENETC_SI_F_QBU))
+ len += ARRAY_SIZE(enetc_port_mm_regs);
+
len *= sizeof(u32) * 2; /* store 2 entries per reg: addr and value */
return len;
@@ -90,6 +99,14 @@ static void enetc_get_regs(struct net_device *ndev, struct ethtool_regs *regs,
*buf++ = addr;
*buf++ = enetc_rd(hw, addr);
}
+
+ if (priv->si->hw_features & ENETC_SI_F_QBU) {
+ for (i = 0; i < ARRAY_SIZE(enetc_port_mm_regs); i++) {
+ addr = ENETC_PORT_BASE + enetc_port_mm_regs[i];
+ *buf++ = addr;
+ *buf++ = enetc_rd(hw, addr);
+ }
+ }
}
static const struct {
@@ -976,7 +993,9 @@ static int enetc_get_mm(struct net_device *ndev, struct ethtool_mm_state *state)
lafs = ENETC_MMCSR_GET_LAFS(val);
state->rx_min_frag_size = ethtool_mm_frag_size_add_to_min(lafs);
state->tx_enabled = !!(val & ENETC_MMCSR_LPE); /* mirror of MMCSR_ME */
- state->tx_active = !!(val & ENETC_MMCSR_LPA);
+ state->tx_active = state->tx_enabled &&
+ (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED ||
+ state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED);
state->verify_enabled = !(val & ENETC_MMCSR_VDIS);
state->verify_time = ENETC_MMCSR_GET_VT(val);
/* A verifyTime of 128 ms would exceed the 7 bit width
@@ -989,6 +1008,78 @@ static int enetc_get_mm(struct net_device *ndev, struct ethtool_mm_state *state)
return 0;
}
+static int enetc_mm_wait_tx_active(struct enetc_hw *hw, int verify_time)
+{
+ int timeout = verify_time * USEC_PER_MSEC * ENETC_MM_VERIFY_RETRIES;
+ u32 val;
+
+ /* This will time out after the standard value of 3 verification
+ * attempts. To not sleep forever, it relies on a non-zero verify_time,
+ * guarantee which is provided by the ethtool nlattr policy.
+ */
+ return read_poll_timeout(enetc_port_rd, val,
+ ENETC_MMCSR_GET_VSTS(val) == 3,
+ ENETC_MM_VERIFY_SLEEP_US, timeout,
+ true, hw, ENETC_MMCSR);
+}
+
+static void enetc_set_ptcfpr(struct enetc_hw *hw, u8 preemptible_tcs)
+{
+ u32 val;
+ int tc;
+
+ for (tc = 0; tc < 8; tc++) {
+ val = enetc_port_rd(hw, ENETC_PTCFPR(tc));
+
+ if (preemptible_tcs & BIT(tc))
+ val |= ENETC_PTCFPR_FPE;
+ else
+ val &= ~ENETC_PTCFPR_FPE;
+
+ enetc_port_wr(hw, ENETC_PTCFPR(tc), val);
+ }
+}
+
+/* ENETC does not have an IRQ to notify changes to the MAC Merge TX status
+ * (active/inactive), but the preemptible traffic classes should only be
+ * committed to hardware once TX is active. Resort to polling.
+ */
+void enetc_mm_commit_preemptible_tcs(struct enetc_ndev_priv *priv)
+{
+ struct enetc_hw *hw = &priv->si->hw;
+ u8 preemptible_tcs = 0;
+ u32 val;
+ int err;
+
+ val = enetc_port_rd(hw, ENETC_MMCSR);
+ if (!(val & ENETC_MMCSR_ME))
+ goto out;
+
+ if (!(val & ENETC_MMCSR_VDIS)) {
+ err = enetc_mm_wait_tx_active(hw, ENETC_MMCSR_GET_VT(val));
+ if (err)
+ goto out;
+ }
+
+ preemptible_tcs = priv->preemptible_tcs;
+out:
+ enetc_set_ptcfpr(hw, preemptible_tcs);
+}
+
+/* FIXME: Workaround for the link partner's verification failing if ENETC
+ * priorly received too much express traffic. The documentation doesn't
+ * suggest this is needed.
+ */
+static void enetc_restart_emac_rx(struct enetc_si *si)
+{
+ u32 val = enetc_port_rd(&si->hw, ENETC_PM0_CMD_CFG);
+
+ enetc_port_wr(&si->hw, ENETC_PM0_CMD_CFG, val & ~ENETC_PM0_RX_EN);
+
+ if (val & ENETC_PM0_RX_EN)
+ enetc_port_wr(&si->hw, ENETC_PM0_CMD_CFG, val);
+}
+
static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg,
struct netlink_ext_ack *extack)
{
@@ -1027,10 +1118,13 @@ static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg,
else
priv->active_offloads &= ~ENETC_F_QBU;
- /* If link is up, enable MAC Merge right away */
- if (!!(priv->active_offloads & ENETC_F_QBU) &&
- !(val & ENETC_MMCSR_LINK_FAIL))
- val |= ENETC_MMCSR_ME;
+ /* If link is up, enable/disable MAC Merge right away */
+ if (!(val & ENETC_MMCSR_LINK_FAIL)) {
+ if (!!(priv->active_offloads & ENETC_F_QBU))
+ val |= ENETC_MMCSR_ME;
+ else
+ val &= ~ENETC_MMCSR_ME;
+ }
val &= ~ENETC_MMCSR_VT_MASK;
val |= ENETC_MMCSR_VT(cfg->verify_time);
@@ -1040,6 +1134,10 @@ static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg,
enetc_port_wr(hw, ENETC_MMCSR, val);
+ enetc_restart_emac_rx(priv->si);
+
+ enetc_mm_commit_preemptible_tcs(priv);
+
mutex_unlock(&priv->mm_lock);
return 0;
@@ -1073,6 +1171,8 @@ void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link)
enetc_port_wr(hw, ENETC_MMCSR, val);
+ enetc_mm_commit_preemptible_tcs(priv);
+
mutex_unlock(&priv->mm_lock);
}
EXPORT_SYMBOL_GPL(enetc_mm_link_state_update);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index de2e0ee8cdcb..1619943fb263 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -3,6 +3,9 @@
#include <linux/bitops.h>
+#define ENETC_MM_VERIFY_SLEEP_US USEC_PER_MSEC
+#define ENETC_MM_VERIFY_RETRIES 3
+
/* ENETC device IDs */
#define ENETC_DEV_ID_PF 0xe100
#define ENETC_DEV_ID_VF 0xef00
@@ -965,6 +968,10 @@ static inline u32 enetc_usecs_to_cycles(u32 usecs)
return (u32)div_u64(usecs * ENETC_CLK, 1000000ULL);
}
+/* Port traffic class frame preemption register */
+#define ENETC_PTCFPR(n) (0x1910 + (n) * 4) /* n = [0 ..7] */
+#define ENETC_PTCFPR_FPE BIT(31)
+
/* port time gating control register */
#define ENETC_PTGCR 0x11a00
#define ENETC_PTGCR_TGE BIT(31)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5ba1e0d71c68..9939ccafb556 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -507,6 +507,11 @@ struct bufdesc_ex {
/* i.MX6Q adds pm_qos support */
#define FEC_QUIRK_HAS_PMQOS BIT(23)
+/* Not all FEC hardware block MDIOs support accesses in C45 mode.
+ * Older blocks in the ColdFire parts do not support it.
+ */
+#define FEC_QUIRK_HAS_MDIO_C45 BIT(24)
+
struct bufdesc_prop {
int qid;
/* Address of Rx and Tx buffers */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f3b16a6673e2..160c1b3525f5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -100,18 +100,19 @@ struct fec_devinfo {
static const struct fec_devinfo fec_imx25_info = {
.quirks = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR |
- FEC_QUIRK_HAS_FRREG,
+ FEC_QUIRK_HAS_FRREG | FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx27_info = {
- .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG,
+ .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx28_info = {
.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC |
FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII |
- FEC_QUIRK_NO_HARD_RESET,
+ FEC_QUIRK_NO_HARD_RESET | FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx6q_info = {
@@ -119,11 +120,12 @@ static const struct fec_devinfo fec_imx6q_info = {
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
FEC_QUIRK_HAS_RACC | FEC_QUIRK_CLEAR_SETUP_MII |
- FEC_QUIRK_HAS_PMQOS,
+ FEC_QUIRK_HAS_PMQOS | FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_mvf600_info = {
- .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC,
+ .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx6x_info = {
@@ -132,7 +134,8 @@ static const struct fec_devinfo fec_imx6x_info = {
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
- FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES,
+ FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx6ul_info = {
@@ -140,7 +143,8 @@ static const struct fec_devinfo fec_imx6ul_info = {
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 |
FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC |
- FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII,
+ FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx8mq_info = {
@@ -150,7 +154,8 @@ static const struct fec_devinfo fec_imx8mq_info = {
FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
- FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2,
+ FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2 |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_imx8qm_info = {
@@ -160,14 +165,15 @@ static const struct fec_devinfo fec_imx8qm_info = {
FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
- FEC_QUIRK_DELAYED_CLKS_SUPPORT,
+ FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45,
};
static const struct fec_devinfo fec_s32v234_info = {
.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
- FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE,
+ FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+ FEC_QUIRK_HAS_MDIO_C45,
};
static struct platform_device_id fec_devtype[] = {
@@ -2434,8 +2440,10 @@ static int fec_enet_mii_init(struct platform_device *pdev)
fep->mii_bus->name = "fec_enet_mii_bus";
fep->mii_bus->read = fec_enet_mdio_read_c22;
fep->mii_bus->write = fec_enet_mdio_write_c22;
- fep->mii_bus->read_c45 = fec_enet_mdio_read_c45;
- fep->mii_bus->write_c45 = fec_enet_mdio_write_c45;
+ if (fep->quirks & FEC_QUIRK_HAS_MDIO_C45) {
+ fep->mii_bus->read_c45 = fec_enet_mdio_read_c45;
+ fep->mii_bus->write_c45 = fec_enet_mdio_write_c45;
+ }
snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
pdev->name, fep->dev_id + 1);
fep->mii_bus->priv = fep;
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index e214b51d3c8b..98eb78d98e9f 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -49,7 +49,7 @@
#define GVE_XDP_ACTIONS 5
-#define GVE_TX_MAX_HEADER_SIZE 182
+#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index e50510b8e784..813da572abca 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -351,8 +351,8 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
int bytes;
int hlen;
- hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
- tcp_hdrlen(skb) : skb_headlen(skb);
+ hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) :
+ min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
hlen);
@@ -522,13 +522,11 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
pkt_desc = &tx->desc[idx];
l4_hdr_offset = skb_checksum_start_offset(skb);
- /* If the skb is gso, then we want the tcp header in the first segment
- * otherwise we want the linear portion of the skb (which will contain
- * the checksum because skb->csum_start and skb->csum_offset are given
- * relative to skb->head) in the first segment.
+ /* If the skb is gso, then we want the tcp header alone in the first segment
+ * otherwise we want the minimum required by the gVNIC spec.
*/
hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
- skb_headlen(skb);
+ min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
info->skb = skb;
/* We don't want to split the header, so if necessary, pad to the end
@@ -732,7 +730,7 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 reqi = tx->req;
pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len);
- if (pad >= GVE_TX_MAX_HEADER_SIZE)
+ if (pad >= GVE_GQ_TX_MIN_PKT_DESC_BYTES)
pad = 0;
info = &tx->info[reqi & tx->mask];
info->xdp_frame = frame_p;
@@ -812,7 +810,7 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
{
int nsegs;
- if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1))
+ if (!gve_can_tx(tx, len + GVE_GQ_TX_MIN_PKT_DESC_BYTES - 1))
return -EBUSY;
nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p, false);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 6f5c16aebcbf..bd7ef59b1f2e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5287,31 +5287,6 @@ static void e1000_watchdog_task(struct work_struct *work)
ew32(TARC(0), tarc0);
}
- /* disable TSO for pcie and 10/100 speeds, to avoid
- * some hardware issues
- */
- if (!(adapter->flags & FLAG_TSO_FORCE)) {
- switch (adapter->link_speed) {
- case SPEED_10:
- case SPEED_100:
- e_info("10/100 speed: disabling TSO\n");
- netdev->features &= ~NETIF_F_TSO;
- netdev->features &= ~NETIF_F_TSO6;
- break;
- case SPEED_1000:
- netdev->features |= NETIF_F_TSO;
- netdev->features |= NETIF_F_TSO6;
- break;
- default:
- /* oops */
- break;
- }
- if (hw->mac.type == e1000_pch_spt) {
- netdev->features &= ~NETIF_F_TSO;
- netdev->features &= ~NETIF_F_TSO6;
- }
- }
-
/* enable transmits in the hardware, need to do this
* after setting TARC(0)
*/
@@ -7525,6 +7500,32 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
NETIF_F_RXCSUM |
NETIF_F_HW_CSUM);
+ /* disable TSO for pcie and 10/100 speeds to avoid
+ * some hardware issues and for i219 to fix transfer
+ * speed being capped at 60%
+ */
+ if (!(adapter->flags & FLAG_TSO_FORCE)) {
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ e_info("10/100 speed: disabling TSO\n");
+ netdev->features &= ~NETIF_F_TSO;
+ netdev->features &= ~NETIF_F_TSO6;
+ break;
+ case SPEED_1000:
+ netdev->features |= NETIF_F_TSO;
+ netdev->features |= NETIF_F_TSO6;
+ break;
+ default:
+ /* oops */
+ break;
+ }
+ if (hw->mac.type == e1000_pch_spt) {
+ netdev->features &= ~NETIF_F_TSO;
+ netdev->features &= ~NETIF_F_TSO6;
+ }
+ }
+
/* Set user-changeable features (subset of all device features) */
netdev->hw_features = netdev->features;
netdev->hw_features |= NETIF_F_RXFCS;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c8ff5675b29d..b847bd105b16 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11072,8 +11072,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
pf->hw.aq.asq_last_status));
}
/* reinit the misc interrupt */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
ret = i40e_setup_misc_vector(pf);
+ if (ret)
+ goto end_unlock;
+ }
/* Add a filter to drop all Flow control frames from any VSI from being
* transmitted. By doing so we stop a malicious VF from sending out
@@ -14147,15 +14150,15 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
vsi->id = ctxt.vsi_number;
}
- vsi->active_filters = 0;
- clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
spin_lock_bh(&vsi->mac_filter_hash_lock);
+ vsi->active_filters = 0;
/* If macvlan filters already exist, force them to get loaded */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
f->state = I40E_FILTER_NEW;
f_count++;
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
+ clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
if (f_count) {
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 2cdce251472c..9abaff1f2aff 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -58,8 +58,6 @@ enum iavf_vsi_state_t {
struct iavf_vsi {
struct iavf_adapter *back;
struct net_device *netdev;
- unsigned long active_cvlans[BITS_TO_LONGS(VLAN_N_VID)];
- unsigned long active_svlans[BITS_TO_LONGS(VLAN_N_VID)];
u16 seid;
u16 id;
DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
@@ -157,15 +155,20 @@ struct iavf_vlan {
u16 tpid;
};
+enum iavf_vlan_state_t {
+ IAVF_VLAN_INVALID,
+ IAVF_VLAN_ADD, /* filter needs to be added */
+ IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */
+ IAVF_VLAN_ACTIVE, /* filter is accepted by PF */
+ IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */
+ IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */
+ IAVF_VLAN_REMOVE, /* filter needs to be removed from list */
+};
+
struct iavf_vlan_filter {
struct list_head list;
struct iavf_vlan vlan;
- struct {
- u8 is_new_vlan:1; /* filter is new, wait for PF answer */
- u8 remove:1; /* filter needs to be removed */
- u8 add:1; /* filter needs to be added */
- u8 padding:5;
- };
+ enum iavf_vlan_state_t state;
};
#define IAVF_MAX_TRAFFIC_CLASS 4
@@ -257,6 +260,7 @@ struct iavf_adapter {
wait_queue_head_t vc_waitqueue;
struct iavf_q_vector *q_vectors;
struct list_head vlan_filter_list;
+ int num_vlan_filters;
struct list_head mac_filter_list;
struct mutex crit_lock;
struct mutex client_lock;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 095201e83c9d..2de4baff4c20 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -791,7 +791,8 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter,
f->vlan = vlan;
list_add_tail(&f->list, &adapter->vlan_filter_list);
- f->add = true;
+ f->state = IAVF_VLAN_ADD;
+ adapter->num_vlan_filters++;
adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
}
@@ -813,7 +814,7 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
f = iavf_find_vlan(adapter, vlan);
if (f) {
- f->remove = true;
+ f->state = IAVF_VLAN_REMOVE;
adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
}
@@ -828,14 +829,18 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
**/
static void iavf_restore_filters(struct iavf_adapter *adapter)
{
- u16 vid;
+ struct iavf_vlan_filter *f;
/* re-add all VLAN filters */
- for_each_set_bit(vid, adapter->vsi.active_cvlans, VLAN_N_VID)
- iavf_add_vlan(adapter, IAVF_VLAN(vid, ETH_P_8021Q));
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
- for_each_set_bit(vid, adapter->vsi.active_svlans, VLAN_N_VID)
- iavf_add_vlan(adapter, IAVF_VLAN(vid, ETH_P_8021AD));
+ list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+ if (f->state == IAVF_VLAN_INACTIVE)
+ f->state = IAVF_VLAN_ADD;
+ }
+
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
}
/**
@@ -844,8 +849,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter)
*/
u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
{
- return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
- bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
+ return adapter->num_vlan_filters;
}
/**
@@ -928,11 +932,6 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
return 0;
iavf_del_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto)));
- if (proto == cpu_to_be16(ETH_P_8021Q))
- clear_bit(vid, adapter->vsi.active_cvlans);
- else
- clear_bit(vid, adapter->vsi.active_svlans);
-
return 0;
}
@@ -1293,16 +1292,11 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter)
}
}
- /* remove all VLAN filters */
+ /* disable all VLAN filters */
list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
- list) {
- if (vlf->add) {
- list_del(&vlf->list);
- kfree(vlf);
- } else {
- vlf->remove = true;
- }
- }
+ list)
+ vlf->state = IAVF_VLAN_DISABLE;
+
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
@@ -2914,6 +2908,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
list_del(&fv->list);
kfree(fv);
}
+ adapter->num_vlan_filters = 0;
spin_unlock_bh(&adapter->mac_vlan_list_lock);
@@ -3131,9 +3126,6 @@ continue_reset:
adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
iavf_misc_irq_enable(adapter);
- bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
- bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
-
mod_delayed_work(adapter->wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 4e17d006c52d..9afbbdac3590 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -642,16 +642,10 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
- if (f->is_new_vlan) {
- if (f->vlan.tpid == ETH_P_8021Q)
- clear_bit(f->vlan.vid,
- adapter->vsi.active_cvlans);
- else
- clear_bit(f->vlan.vid,
- adapter->vsi.active_svlans);
-
+ if (f->state == IAVF_VLAN_IS_NEW) {
list_del(&f->list);
kfree(f);
+ adapter->num_vlan_filters--;
}
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
@@ -679,7 +673,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
- if (f->add)
+ if (f->state == IAVF_VLAN_ADD)
count++;
}
if (!count || !VLAN_FILTERING_ALLOWED(adapter)) {
@@ -710,11 +704,10 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vvfl->vsi_id = adapter->vsi_res->vsi_id;
vvfl->num_elements = count;
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
- if (f->add) {
+ if (f->state == IAVF_VLAN_ADD) {
vvfl->vlan_id[i] = f->vlan.vid;
i++;
- f->add = false;
- f->is_new_vlan = true;
+ f->state = IAVF_VLAN_IS_NEW;
if (i == count)
break;
}
@@ -760,7 +753,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vvfl_v2->vport_id = adapter->vsi_res->vsi_id;
vvfl_v2->num_elements = count;
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
- if (f->add) {
+ if (f->state == IAVF_VLAN_ADD) {
struct virtchnl_vlan_supported_caps *filtering_support =
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
@@ -778,8 +771,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vlan->tpid = f->vlan.tpid;
i++;
- f->add = false;
- f->is_new_vlan = true;
+ f->state = IAVF_VLAN_IS_NEW;
}
}
@@ -822,10 +814,16 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
* filters marked for removal to enable bailing out before
* sending a virtchnl message
*/
- if (f->remove && !VLAN_FILTERING_ALLOWED(adapter)) {
+ if (f->state == IAVF_VLAN_REMOVE &&
+ !VLAN_FILTERING_ALLOWED(adapter)) {
list_del(&f->list);
kfree(f);
- } else if (f->remove) {
+ adapter->num_vlan_filters--;
+ } else if (f->state == IAVF_VLAN_DISABLE &&
+ !VLAN_FILTERING_ALLOWED(adapter)) {
+ f->state = IAVF_VLAN_INACTIVE;
+ } else if (f->state == IAVF_VLAN_REMOVE ||
+ f->state == IAVF_VLAN_DISABLE) {
count++;
}
}
@@ -857,11 +855,18 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vvfl->vsi_id = adapter->vsi_res->vsi_id;
vvfl->num_elements = count;
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
- if (f->remove) {
+ if (f->state == IAVF_VLAN_DISABLE) {
vvfl->vlan_id[i] = f->vlan.vid;
+ f->state = IAVF_VLAN_INACTIVE;
i++;
+ if (i == count)
+ break;
+ } else if (f->state == IAVF_VLAN_REMOVE) {
+ vvfl->vlan_id[i] = f->vlan.vid;
list_del(&f->list);
kfree(f);
+ adapter->num_vlan_filters--;
+ i++;
if (i == count)
break;
}
@@ -901,7 +906,8 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vvfl_v2->vport_id = adapter->vsi_res->vsi_id;
vvfl_v2->num_elements = count;
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
- if (f->remove) {
+ if (f->state == IAVF_VLAN_DISABLE ||
+ f->state == IAVF_VLAN_REMOVE) {
struct virtchnl_vlan_supported_caps *filtering_support =
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
@@ -915,8 +921,13 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vlan->tci = f->vlan.vid;
vlan->tpid = f->vlan.tpid;
- list_del(&f->list);
- kfree(f);
+ if (f->state == IAVF_VLAN_DISABLE) {
+ f->state = IAVF_VLAN_INACTIVE;
+ } else {
+ list_del(&f->list);
+ kfree(f);
+ adapter->num_vlan_filters--;
+ }
i++;
if (i == count)
break;
@@ -2192,7 +2203,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
list_for_each_entry(vlf,
&adapter->vlan_filter_list,
list)
- vlf->add = true;
+ vlf->state = IAVF_VLAN_ADD;
adapter->aq_required |=
IAVF_FLAG_AQ_ADD_VLAN_FILTER;
@@ -2260,7 +2271,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
list_for_each_entry(vlf,
&adapter->vlan_filter_list,
list)
- vlf->add = true;
+ vlf->state = IAVF_VLAN_ADD;
aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
}
@@ -2444,15 +2455,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
- if (f->is_new_vlan) {
- f->is_new_vlan = false;
- if (f->vlan.tpid == ETH_P_8021Q)
- set_bit(f->vlan.vid,
- adapter->vsi.active_cvlans);
- else
- set_bit(f->vlan.vid,
- adapter->vsi.active_svlans);
- }
+ if (f->state == IAVF_VLAN_IS_NEW)
+ f->state = IAVF_VLAN_ACTIVE;
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 5fd75e75772e..daa6a1e894cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -542,6 +542,21 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
}
/**
+ * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR
+ * @fdir: pointer to the VF FDIR structure
+ */
+static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir)
+{
+ enum ice_fltr_ptype flow;
+
+ for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+ flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ fdir->fdir_fltr_cnt[flow][0] = 0;
+ fdir->fdir_fltr_cnt[flow][1] = 0;
+ }
+}
+
+/**
* ice_vc_fdir_has_prof_conflict
* @vf: pointer to the VF structure
* @conf: FDIR configuration for each filter
@@ -1871,7 +1886,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
v_ret = VIRTCHNL_STATUS_SUCCESS;
stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
- goto err_free_conf;
+ goto err_rem_entry;
}
ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
@@ -1880,15 +1895,16 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
vf->vf_id, ret);
- goto err_rem_entry;
+ goto err_clr_irq;
}
exit:
kfree(stat);
return ret;
-err_rem_entry:
+err_clr_irq:
ice_vc_fdir_clear_irq_ctx(vf);
+err_rem_entry:
ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
err_free_conf:
devm_kfree(dev, conf);
@@ -1997,6 +2013,7 @@ void ice_vf_fdir_init(struct ice_vf *vf)
spin_lock_init(&fdir->ctx_lock);
fdir->ctx_irq.flags = 0;
fdir->ctx_done.flags = 0;
+ ice_vc_fdir_reset_cnt_all(fdir);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 773c35fecace..f2604fc05991 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -36,6 +36,7 @@
#include <net/tc_act/tc_mirred.h>
#include <net/vxlan.h>
#include <net/mpls.h>
+#include <net/netdev_queues.h>
#include <net/xdp_sock_drv.h>
#include <net/xfrm.h>
@@ -1119,6 +1120,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
unsigned int budget = q_vector->tx.work_limit;
unsigned int i = tx_ring->next_to_clean;
+ struct netdev_queue *txq;
if (test_bit(__IXGBE_DOWN, &adapter->state))
return true;
@@ -1249,24 +1251,14 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (ring_is_xdp(tx_ring))
return !!budget;
- netdev_tx_completed_queue(txring_txq(tx_ring),
- total_packets, total_bytes);
-
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
- if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
- (ixgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
- /* Make sure that anybody stopping the queue after this
- * sees the new next_to_clean.
- */
- smp_mb();
- if (__netif_subqueue_stopped(tx_ring->netdev,
- tx_ring->queue_index)
- && !test_bit(__IXGBE_DOWN, &adapter->state)) {
- netif_wake_subqueue(tx_ring->netdev,
- tx_ring->queue_index);
- ++tx_ring->tx_stats.restart_queue;
- }
- }
+ txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+ if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
+ ixgbe_desc_unused(tx_ring),
+ TX_WAKE_THRESHOLD,
+ netif_carrier_ok(tx_ring->netdev) &&
+ test_bit(__IXGBE_DOWN, &adapter->state)))
+ ++tx_ring->tx_stats.restart_queue;
return !!budget;
}
@@ -8270,22 +8262,10 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
{
- netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
-
- /* Herbert's original patch had:
- * smp_mb__after_netif_stop_queue();
- * but since that doesn't exist yet, just open code it.
- */
- smp_mb();
-
- /* We need to check again in a case another CPU has just
- * made room available.
- */
- if (likely(ixgbe_desc_unused(tx_ring) < size))
+ if (!netif_subqueue_try_stop(tx_ring->netdev, tx_ring->queue_index,
+ ixgbe_desc_unused(tx_ring), size))
return -EBUSY;
- /* A reprieve! - use start_queue because it doesn't call schedule */
- netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
++tx_ring->tx_stats.restart_queue;
return 0;
}
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index f58a1c0144ba..884d64114bff 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -34,6 +34,7 @@ config MV643XX_ETH
config MVMDIO
tristate "Marvell MDIO interface support"
depends on HAS_IOMEM
+ select MDIO_DEVRES
select PHYLIB
help
This driver supports the MDIO interface found in the network
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 9fe9ac3b23c3..9e948d091a69 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -729,6 +729,7 @@ static void mtk_mac_link_up(struct phylink_config *config,
MAC_MCR_FORCE_RX_FC);
/* Configure speed */
+ mac->speed = speed;
switch (speed) {
case SPEED_2500:
case SPEED_1000:
@@ -3232,6 +3233,9 @@ found:
if (dp->index >= MTK_QDMA_NUM_QUEUES)
return NOTIFY_DONE;
+ if (mac->speed > 0 && mac->speed <= s.base.speed)
+ s.base.speed = 0;
+
mtk_set_queue_speed(eth, dp->index + 3, s.base.speed);
return NOTIFY_DONE;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 23c7abeb5c14..cdcf8534283e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -1276,6 +1276,9 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
int mtk_eth_offload_init(struct mtk_eth *eth);
int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data);
+int mtk_flow_offload_cmd(struct mtk_eth *eth, struct flow_cls_offload *cls,
+ int ppe_index);
+void mtk_flow_offload_cleanup(struct mtk_eth *eth, struct list_head *list);
void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index f9c9f2ea4206..dd9581334b05 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -635,10 +635,20 @@ void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
static int
mtk_foe_entry_commit_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
{
+ struct mtk_flow_entry *prev;
+
entry->type = MTK_FLOW_TYPE_L2;
- return rhashtable_insert_fast(&ppe->l2_flows, &entry->l2_node,
- mtk_flow_l2_ht_params);
+ prev = rhashtable_lookup_get_insert_fast(&ppe->l2_flows, &entry->l2_node,
+ mtk_flow_l2_ht_params);
+ if (likely(!prev))
+ return 0;
+
+ if (IS_ERR(prev))
+ return PTR_ERR(prev);
+
+ return rhashtable_replace_fast(&ppe->l2_flows, &prev->l2_node,
+ &entry->l2_node, mtk_flow_l2_ht_params);
}
int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
index 53cf87e9acbb..316fe2e70fea 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -47,7 +47,7 @@ static const char *mtk_foe_pkt_type_str(int type)
static void
mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
{
- u32 n_addr[4];
+ __be32 n_addr[4];
int i;
if (!ipv6) {
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 46634dc29d2f..02eebff02d45 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -235,7 +235,8 @@ out:
}
static int
-mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f,
+ int ppe_index)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_action_entry *act;
@@ -452,6 +453,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
entry->cookie = f->cookie;
memcpy(&entry->data, &foe, sizeof(entry->data));
entry->wed_index = wed_index;
+ entry->ppe_index = ppe_index;
err = mtk_foe_entry_commit(eth->ppe[entry->ppe_index], entry);
if (err < 0)
@@ -520,25 +522,15 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
static DEFINE_MUTEX(mtk_flow_offload_mutex);
-static int
-mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+int mtk_flow_offload_cmd(struct mtk_eth *eth, struct flow_cls_offload *cls,
+ int ppe_index)
{
- struct flow_cls_offload *cls = type_data;
- struct net_device *dev = cb_priv;
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
int err;
- if (!tc_can_offload(dev))
- return -EOPNOTSUPP;
-
- if (type != TC_SETUP_CLSFLOWER)
- return -EOPNOTSUPP;
-
mutex_lock(&mtk_flow_offload_mutex);
switch (cls->command) {
case FLOW_CLS_REPLACE:
- err = mtk_flow_offload_replace(eth, cls);
+ err = mtk_flow_offload_replace(eth, cls, ppe_index);
break;
case FLOW_CLS_DESTROY:
err = mtk_flow_offload_destroy(eth, cls);
@@ -556,6 +548,26 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri
}
static int
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct flow_cls_offload *cls = type_data;
+ struct net_device *dev = cb_priv;
+ struct mtk_mac *mac;
+ struct mtk_eth *eth;
+
+ mac = netdev_priv(dev);
+ eth = mac->hw;
+
+ if (!tc_can_offload(dev))
+ return -EOPNOTSUPP;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ return mtk_flow_offload_cmd(eth, cls, 0);
+}
+
+static int
mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
{
struct mtk_mac *mac = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 95d890870984..4c205afbd230 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -13,6 +13,8 @@
#include <linux/mfd/syscon.h>
#include <linux/debugfs.h>
#include <linux/soc/mediatek/mtk_wed.h>
+#include <net/flow_offload.h>
+#include <net/pkt_cls.h>
#include "mtk_eth_soc.h"
#include "mtk_wed_regs.h"
#include "mtk_wed.h"
@@ -41,6 +43,11 @@
static struct mtk_wed_hw *hw_list[2];
static DEFINE_MUTEX(hw_lock);
+struct mtk_wed_flow_block_priv {
+ struct mtk_wed_hw *hw;
+ struct net_device *dev;
+};
+
static void
wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
{
@@ -1745,6 +1752,99 @@ out:
mutex_unlock(&hw_lock);
}
+static int
+mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct mtk_wed_flow_block_priv *priv = cb_priv;
+ struct flow_cls_offload *cls = type_data;
+ struct mtk_wed_hw *hw = priv->hw;
+
+ if (!tc_can_offload(priv->dev))
+ return -EOPNOTSUPP;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ return mtk_flow_offload_cmd(hw->eth, cls, hw->index);
+}
+
+static int
+mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev,
+ struct flow_block_offload *f)
+{
+ struct mtk_wed_flow_block_priv *priv;
+ static LIST_HEAD(block_cb_list);
+ struct flow_block_cb *block_cb;
+ struct mtk_eth *eth = hw->eth;
+ flow_setup_cb_t *cb;
+
+ if (!eth->soc->offload_version)
+ return -EOPNOTSUPP;
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ cb = mtk_wed_setup_tc_block_cb;
+ f->driver_block_list = &block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
+ if (block_cb) {
+ flow_block_cb_incref(block_cb);
+ return 0;
+ }
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->hw = hw;
+ priv->dev = dev;
+ block_cb = flow_block_cb_alloc(cb, dev, priv, NULL);
+ if (IS_ERR(block_cb)) {
+ kfree(priv);
+ return PTR_ERR(block_cb);
+ }
+
+ flow_block_cb_incref(block_cb);
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &block_cb_list);
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
+ if (!block_cb)
+ return -ENOENT;
+
+ if (!flow_block_cb_decref(block_cb)) {
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ kfree(block_cb->cb_priv);
+ }
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mtk_wed_setup_tc(struct mtk_wed_device *wed, struct net_device *dev,
+ enum tc_setup_type type, void *type_data)
+{
+ struct mtk_wed_hw *hw = wed->hw;
+
+ if (hw->version < 2)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ case TC_SETUP_FT:
+ return mtk_wed_setup_tc_block(hw, dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
void __iomem *wdma, phys_addr_t wdma_phy,
int index)
@@ -1764,6 +1864,7 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
.irq_set_mask = mtk_wed_irq_set_mask,
.detach = mtk_wed_detach,
.ppe_check = mtk_wed_ppe_check,
+ .setup_tc = mtk_wed_setup_tc,
};
struct device_node *eth_np = eth->dev->of_node;
struct platform_device *pdev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 4b5e459b6d49..332472fe4990 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -681,14 +681,32 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
-int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
+int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
{
struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
+ struct mlx4_cqe *cqe = _ctx->cqe;
+ enum xdp_rss_hash_type xht = 0;
+ __be16 status;
if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH)))
return -ENODATA;
- *hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid);
+ *hash = be32_to_cpu(cqe->immed_rss_invalid);
+ status = cqe->status;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP))
+ xht = XDP_RSS_L4_TCP;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP))
+ xht = XDP_RSS_L4_UDP;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F))
+ xht |= XDP_RSS_L3_IPV4;
+ if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) {
+ xht |= XDP_RSS_L3_IPV6;
+ if (cqe->ipv6_ext_mask)
+ xht |= XDP_RSS_L3_DYNHDR;
+ }
+ *rss_type = xht;
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 2f79378fbf6e..65cb63f6c465 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -228,7 +228,9 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
{
- return ring->prod - ring->cons > ring->full_size;
+ u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons);
+
+ return used > ring->full_size;
}
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
@@ -1083,7 +1085,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
}
- ring->prod += nr_txbb;
+ WRITE_ONCE(ring->prod, ring->prod + nr_txbb);
/* If we used a bounce buffer then copy descriptor back into place */
if (unlikely(bounce))
@@ -1214,7 +1216,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
rx_ring->xdp_tx++;
- ring->prod += MLX4_EN_XDP_TX_NRTXBB;
+ WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB);
/* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 034733b13b1a..321f801c1d7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -798,7 +798,8 @@ int mlx4_en_netdev_event(struct notifier_block *this,
struct xdp_md;
int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
-int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash);
+int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type);
/*
* Functions for time stamping
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 6c2f1d4a58ab..ca3c66cd47ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -75,7 +75,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
-mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
+mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o en/rep/bridge.o
mlx5_core-$(CONFIG_THERMAL) += thermal.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
@@ -112,8 +112,8 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
steering/dr_ste_v2.o \
steering/dr_cmd.o steering/dr_fw.o \
steering/dr_action.o steering/fs_dr.o \
- steering/dr_definer.o \
- steering/dr_dbg.o lib/smfs.o
+ steering/dr_definer.o steering/dr_ptrn.o \
+ steering/dr_arg.o steering/dr_dbg.o lib/smfs.o
#
# SF device
#
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index b00e33ed05e9..d53de39539a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1802,7 +1802,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
if (in_size <= 16)
goto cache_miss;
- for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ for (i = 0; i < dev->profile.num_cmd_caches; i++) {
ch = &cmd->cache[i];
if (in_size > ch->max_inbox_size)
continue;
@@ -2097,7 +2097,7 @@ static void destroy_msg_cache(struct mlx5_core_dev *dev)
struct mlx5_cmd_msg *n;
int i;
- for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ for (i = 0; i < dev->profile.num_cmd_caches; i++) {
ch = &dev->cmd.cache[i];
list_for_each_entry_safe(msg, n, &ch->head, list) {
list_del(&msg->list);
@@ -2127,7 +2127,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev)
int k;
/* Initialize and fill the caches with initial entries */
- for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+ for (k = 0; k < dev->profile.num_cmd_caches; k++) {
ch = &cmd->cache[k];
spin_lock_init(&ch->lock);
INIT_LIST_HEAD(&ch->head);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index e7739acc926e..1b33533b15de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -60,9 +60,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev)
if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
return false;
- if (mlx5_core_is_management_pf(dev))
- return false;
-
if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return false;
@@ -191,9 +188,6 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
return false;
- if (mlx5_core_is_management_pf(dev))
- return false;
-
if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 7c9c4e40c019..d000236ddbac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -75,10 +75,6 @@ int mlx5_ec_init(struct mlx5_core_dev *dev)
if (!mlx5_core_is_ecpf(dev))
return 0;
- /* Management PF don't have a peer PF */
- if (mlx5_core_is_management_pf(dev))
- return 0;
-
return mlx5_host_pf_init(dev);
}
@@ -89,10 +85,6 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
if (!mlx5_core_is_ecpf(dev))
return;
- /* Management PF don't have a peer PF */
- if (mlx5_core_is_management_pf(dev))
- return;
-
mlx5_host_pf_cleanup(dev);
err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ba615b74bb8e..b8987a404d75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -475,59 +475,18 @@ struct mlx5e_txqsq {
cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
-/* XDP packets can be transmitted in different ways. On completion, we need to
- * distinguish between them to clean up things in a proper way.
- */
-enum mlx5e_xdp_xmit_mode {
- /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
- * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
- * returned.
- */
- MLX5E_XDP_XMIT_MODE_FRAME,
-
- /* The xdp_frame was created in place as a result of XDP_TX from a
- * regular RQ. No DMA remapping happened, and the page belongs to us.
- */
- MLX5E_XDP_XMIT_MODE_PAGE,
-
- /* No xdp_frame was created at all, the transmit happened from a UMEM
- * page. The UMEM Completion Ring producer pointer has to be increased.
- */
- MLX5E_XDP_XMIT_MODE_XSK,
-};
-
-struct mlx5e_xdp_info {
- enum mlx5e_xdp_xmit_mode mode;
- union {
- struct {
- struct xdp_frame *xdpf;
- dma_addr_t dma_addr;
- } frame;
- struct {
- struct mlx5e_rq *rq;
- struct page *page;
- } page;
- };
-};
-
-struct mlx5e_xmit_data {
- dma_addr_t dma_addr;
- void *data;
- u32 len;
-};
-
struct mlx5e_xdp_info_fifo {
- struct mlx5e_xdp_info *xi;
+ union mlx5e_xdp_info *xi;
u32 *cc;
u32 *pc;
u32 mask;
};
struct mlx5e_xdpsq;
+struct mlx5e_xmit_data;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *,
- struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
@@ -628,6 +587,7 @@ union mlx5e_alloc_units {
struct mlx5e_mpw_info {
u16 consumed_strides;
DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
+ struct mlx5e_frag_page linear_page;
union mlx5e_alloc_units alloc_units;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 31f3c6e51d9e..ef546ed8b4d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
+ u32 sz;
+
/* XSK frames are mapped as individual pages, because frames may come in
* an arbitrary order from random locations in the UMEM.
*/
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- /* XDP in mlx5e doesn't support multiple packets per page. */
- if (params->xdp_prog)
- return PAGE_SIZE;
+ sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
- return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+ /* XDP in mlx5e doesn't support multiple packets per page.
+ * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
+ */
+ return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz;
}
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
@@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
}
+bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode);
+}
+
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
@@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SHIFT;
+
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
return -EOPNOTSUPP;
- if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- return -EINVAL;
-
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index c9be6eb88012..a5d20f6d6d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index eb5aeba3addf..eb5abd0e55d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -81,23 +81,23 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
-static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, u16 skb_id)
{
- return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+ return (ptpsq->ts_cqe_ctr_mask && (skb_ci != skb_id));
}
static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id)
{
- u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
- u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
+ u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_pi = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
- if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc))
+ if (PTP_WQE_CTR2IDX(skb_id - skb_ci) >= PTP_WQE_CTR2IDX(skb_pi - skb_ci))
return true;
return false;
}
-static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_ci,
u16 skb_id, int budget)
{
struct skb_shared_hwtstamps hwts = {};
@@ -105,13 +105,13 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
ptpsq->cq_stats->resync_event++;
- while (skb_cc != skb_id) {
+ while (skb_ci != skb_id) {
skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
skb_tstamp_tx(skb, &hwts);
ptpsq->cq_stats->resync_cqe++;
napi_consume_skb(skb, budget);
- skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
}
}
@@ -120,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
int budget)
{
u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
- u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
struct sk_buff *skb;
ktime_t hwtstamp;
@@ -131,13 +131,13 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
goto out;
}
- if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) {
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_ci, skb_id)) {
if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) {
/* already handled by a previous resync */
ptpsq->cq_stats->ooo_cqe_drop++;
return;
}
- mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_ci, skb_id, budget);
}
skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index ce85b48d327d..fd191925ab4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -220,6 +220,7 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev,
struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
+ const struct switchdev_obj_port_mdb *mdb;
u16 vport_num, esw_owner_vhca_id;
int err;
@@ -235,6 +236,11 @@ mlx5_esw_bridge_port_obj_add(struct net_device *dev,
err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
vlan->flags, br_offloads, extack);
break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+ err = mlx5_esw_bridge_port_mdb_add(dev, vport_num, esw_owner_vhca_id, mdb->addr,
+ mdb->vid, br_offloads, extack);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -248,6 +254,7 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev,
{
const struct switchdev_obj *obj = port_obj_info->obj;
const struct switchdev_obj_port_vlan *vlan;
+ const struct switchdev_obj_port_mdb *mdb;
u16 vport_num, esw_owner_vhca_id;
if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
@@ -261,6 +268,11 @@ mlx5_esw_bridge_port_obj_del(struct net_device *dev,
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+ mlx5_esw_bridge_port_mdb_del(dev, vport_num, esw_owner_vhca_id, mdb->addr, mdb->vid,
+ br_offloads);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -306,6 +318,10 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
attr->u.vlan_protocol,
br_offloads);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+ err = mlx5_esw_bridge_mcast_set(vport_num, esw_owner_vhca_id,
+ !attr->u.mc_disabled, br_offloads);
+ break;
default:
err = -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index a047a2a4ddac..e8eea9ffd5eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -259,10 +259,6 @@ static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_
BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES,
"rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h");
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- return err;
-
err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
if (err)
return err;
@@ -274,11 +270,7 @@ static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_
return err;
}
- err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
- if (err)
- return err;
-
- return devlink_fmsg_obj_nest_end(fmsg);
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
static int
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 44c1926843a1..b35ff289af49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -57,10 +57,6 @@ static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_
BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES,
"sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h");
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- return err;
-
err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
if (err)
return err;
@@ -72,11 +68,7 @@ static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_
return err;
}
- err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
- if (err)
- return err;
-
- return devlink_fmsg_obj_nest_end(fmsg);
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
index a278f52d52b0..9db1b5307a8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -26,7 +17,6 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_accept = {
- .can_offload = tc_act_can_offload_accept,
.parse_action = tc_act_parse_accept,
.is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
index eba0c8698926..fc923a99b6a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -82,26 +82,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
parse_state->flow_action = flow_action;
}
-void
-mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
- struct mlx5e_tc_flow_action *flow_action_reorder)
-{
- struct flow_action_entry *act;
- int i, j = 0;
-
- flow_action_for_each(i, act, flow_action) {
- /* Add CT action to be first. */
- if (act->id == FLOW_ACTION_CT)
- flow_action_reorder->entries[j++] = act;
- }
-
- flow_action_for_each(i, act, flow_action) {
- if (act->id == FLOW_ACTION_CT)
- continue;
- flow_action_reorder->entries[j++] = act;
- }
-}
-
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index 8346557eeaf6..0e6e1872ac62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -17,8 +17,6 @@ struct mlx5e_tc_act_parse_state {
struct mlx5e_tc_flow *flow;
struct netlink_ext_ack *extack;
u32 actions;
- bool ct;
- bool ct_clear;
bool encap;
bool decap;
bool mpls_push;
@@ -56,6 +54,8 @@ struct mlx5e_tc_act {
const struct flow_action_entry *act,
struct mlx5_flow_attr *attr);
+ bool (*is_missable)(const struct flow_action_entry *act);
+
int (*offload_action)(struct mlx5e_priv *priv,
struct flow_offload_action *fl_act,
struct flow_action_entry *act);
@@ -110,10 +110,6 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
struct netlink_ext_ack *extack);
-void
-mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
- struct mlx5e_tc_flow_action *flow_action_reorder);
-
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
index a829c94289c1..92d3952dfa8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -5,53 +5,22 @@
#include "en/tc_priv.h"
#include "en/tc_ct.h"
-static bool
-tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
- struct netlink_ext_ack *extack = parse_state->extack;
-
- if (parse_state->ct && !clear_action) {
- NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
- return false;
- }
-
- return true;
-}
-
static int
tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
int err;
- /* It's redundant to do ct clear more than once. */
- if (clear_action && parse_state->ct_clear)
- return 0;
-
- err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
- &attr->parse_attr->mod_hdr_acts,
- act, parse_state->extack);
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, act, parse_state->extack);
if (err)
return err;
-
if (mlx5e_is_eswitch_flow(parse_state->flow))
attr->esw_attr->split_count = attr->esw_attr->out_count;
- if (clear_action) {
- parse_state->ct_clear = true;
- } else {
- attr->flags |= MLX5_ATTR_FLAG_CT;
- flow_flag_set(parse_state->flow, CT);
- parse_state->ct = true;
- }
+ attr->flags |= MLX5_ATTR_FLAG_CT;
return 0;
}
@@ -61,27 +30,10 @@ tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
- struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
- int err;
-
- /* If ct action exist, we can ignore previous ct_clear actions */
- if (parse_state->ct)
+ if (!(attr->flags & MLX5_ATTR_FLAG_CT))
return 0;
- if (parse_state->ct_clear) {
- err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
- if (err) {
- NL_SET_ERR_MSG_MOD(parse_state->extack,
- "Failed to set registers for ct clear");
- return err;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
- /* Prevent handling of additional, redundant clear actions */
- parse_state->ct_clear = false;
- }
-
- return 0;
+ return mlx5_tc_ct_flow_offload(parse_state->ct_priv, attr);
}
static bool
@@ -95,10 +47,16 @@ tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
return true;
}
+static bool
+tc_act_is_missable_ct(const struct flow_action_entry *act)
+{
+ return !(act->ct.action & TCA_CT_ACT_CLEAR);
+}
+
struct mlx5e_tc_act mlx5e_tc_act_ct = {
- .can_offload = tc_act_can_offload_ct,
.parse_action = tc_act_parse_ct,
- .is_multi_table_act = tc_act_is_multi_table_act_ct,
.post_parse = tc_act_post_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .is_missable = tc_act_is_missable_ct,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
index 7d16aeabb119..5dc81715d625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -25,7 +16,6 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_drop = {
- .can_offload = tc_act_can_offload_drop,
.parse_action = tc_act_parse_drop,
.is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
index 47597c524e59..3b272bbf4c53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -78,15 +78,6 @@ out_err:
return err;
}
-static bool
-tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -114,6 +105,5 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_pedit = {
- .can_offload = tc_act_can_offload_pedit,
.parse_action = tc_act_parse_pedit,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
index 6454b031ff7a..80b4bc64380a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -4,15 +4,6 @@
#include "act.h"
#include "en/tc_priv.h"
-static bool
-tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -31,6 +22,5 @@ tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_ptype = {
- .can_offload = tc_act_can_offload_ptype,
.parse_action = tc_act_parse_ptype,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
index 2c0196431302..2df02f99cecf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -6,25 +6,6 @@
#include "en/tc_priv.h"
#include "en/tc/act/sample.h"
-static bool
-tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- struct netlink_ext_ack *extack = parse_state->extack;
- bool ct_nat;
-
- ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
-
- if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
- NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
- return false;
- }
-
- return true;
-}
-
static int
tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -65,7 +46,6 @@ tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
}
struct mlx5e_tc_act mlx5e_tc_act_sample = {
- .can_offload = tc_act_can_offload_sample,
.parse_action = tc_act_parse_sample,
.is_multi_table_act = tc_act_is_multi_table_act_sample,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
index 915ce201aeb2..1b78bd9c106a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -5,15 +5,6 @@
#include "en/tc_priv.h"
#include "eswitch.h"
-static bool
-tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -27,6 +18,5 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_trap = {
- .can_offload = tc_act_can_offload_trap,
.parse_action = tc_act_parse_trap,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
index b4fa2de9711d..f1cae21c2c37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -32,15 +32,6 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
return 0;
}
-static bool
-tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -58,6 +49,5 @@ struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
};
struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
- .can_offload = tc_act_can_offload_tun_decap,
.parse_action = tc_act_parse_tun_decap,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
index 2e0d88b513aa..c8a3eaf189f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -141,15 +141,6 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
return err;
}
-static bool
-tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -205,7 +196,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_vlan = {
- .can_offload = tc_act_can_offload_vlan,
.parse_action = tc_act_parse_vlan,
.post_parse = tc_act_post_parse_vlan,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
index 9a8a1a6bd99e..310b99230760 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -50,15 +50,6 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
return err;
}
-static bool
-tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
- const struct flow_action_entry *act,
- int act_index,
- struct mlx5_flow_attr *attr)
-{
- return true;
-}
-
static int
tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -81,6 +72,5 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
}
struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
- .can_offload = tc_act_can_offload_vlan_mangle,
.parse_action = tc_act_parse_vlan_mangle,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 314983bc6f08..24badbaad935 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -83,12 +83,6 @@ struct mlx5_tc_ct_priv {
struct mlx5_tc_ct_debugfs debugfs;
};
-struct mlx5_ct_flow {
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5_ct_ft *ft;
-};
-
struct mlx5_ct_zone_rule {
struct mlx5_ct_fs_rule *rule;
struct mlx5e_mod_hdr_handle *mh;
@@ -598,12 +592,6 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
return 0;
}
-int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
-}
-
static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
char *modact)
@@ -1545,7 +1533,6 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
@@ -1555,8 +1542,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
return -EOPNOTSUPP;
}
+ attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
attr->ct_attr.zone = act->ct.zone;
- attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
attr->ct_attr.act_miss_cookie = act->miss_cookie;
@@ -1892,14 +1879,14 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
- * +---------------------+
- * + ft prio (tc chain) +
- * + original match +
- * +---------------------+
+ * +-----------------------+
+ * + rule (either original +
+ * + or post_act rule) +
+ * +-----------------------+
* | set act_miss_cookie mapping
* | set fte_id
* | set tunnel_id
- * | do decap
+ * | rest of actions before the CT action (for this orig/post_act rule)
* |
* +-------------+
* | Chain 0 |
@@ -1924,32 +1911,21 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_act + original filter actions
+ * + post_act + rest of parsed filter's actions
* + fte_id match +------------------------>
* +--------------+
*
*/
-static struct mlx5_flow_handle *
+static int
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_flow_spec *orig_spec,
struct mlx5_flow_attr *attr)
{
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
- u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_ct_flow *ct_flow;
int act_miss_mapping = 0, err;
struct mlx5_ct_ft *ft;
u16 zone;
- ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!ct_flow) {
- return ERR_PTR(-ENOMEM);
- }
-
/* Register for CT established events */
ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
attr->ct_attr.nf_ft);
@@ -1958,23 +1934,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Failed to register to ft callback");
goto err_ft;
}
- ct_flow->ft = ft;
-
- /* Base flow attributes of both rules on original rule attribute */
- ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!ct_flow->pre_ct_attr) {
- err = -ENOMEM;
- goto err_alloc_pre;
- }
-
- pre_ct_attr = ct_flow->pre_ct_attr;
- memcpy(pre_ct_attr, attr, attr_sz);
- pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
-
- /* Modify the original rule's action to fwd and modify, leave decap */
- pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
- pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->ct_attr.ft = ft;
err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
&act_miss_mapping);
@@ -1982,136 +1942,89 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Failed to get register mapping for act miss");
goto err_get_act_miss;
}
- attr->ct_attr.act_miss_mapping = act_miss_mapping;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
- MAPPED_OBJ_TO_REG, act_miss_mapping);
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
+ ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping);
if (err) {
ct_dbg("Failed to set act miss register mapping");
goto err_mapping;
}
- /* If original flow is decap, we do it before going into ct table
- * so add a rewrite for the tunnel match_id.
- */
- if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
- attr->chain == 0) {
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
- ct_priv->ns_type,
- TUNNEL_TO_REG,
- attr->tunnel_id);
- if (err) {
- ct_dbg("Failed to set tunnel register mapping");
- goto err_mapping;
- }
- }
-
- /* Change original rule point to ct table
- * Chain 0 sets the zone and jumps to ct table
+ /* Chain 0 sets the zone and jumps to ct table
* Other chains jump to pre_ct table to align with act_ct cached logic
*/
- pre_ct_attr->dest_chain = 0;
if (!attr->chain) {
zone = ft->zone & MLX5_CT_ZONE_MASK;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
- ZONE_TO_REG, zone);
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
+ ct_priv->ns_type, ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
- pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
} else {
- pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
- }
-
- mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- pre_mod_acts->num_actions,
- pre_mod_acts->actions);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- ct_dbg("Failed to create pre ct mod hdr");
- goto err_mapping;
- }
- pre_ct_attr->modify_hdr = mod_hdr;
- ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
- pre_ct_attr);
- if (IS_ERR(ct_flow->pre_ct_rule)) {
- err = PTR_ERR(ct_flow->pre_ct_rule);
- ct_dbg("Failed to add pre ct rule");
- goto err_insert_orig;
+ attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
}
- attr->ct_attr.ct_flow = ct_flow;
- mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->ct_attr.act_miss_mapping = act_miss_mapping;
- return ct_flow->pre_ct_rule;
+ return 0;
-err_insert_orig:
- mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
- mlx5e_mod_hdr_dealloc(pre_mod_acts);
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
err_get_act_miss:
- kfree(ct_flow->pre_ct_attr);
-err_alloc_pre:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
- kfree(ct_flow);
netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
- return ERR_PTR(err);
+ return err;
}
-struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+int
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_handle *rule;
+ int err;
if (!priv)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
+
+ if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
+ err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
+ 0, 0, 0, 0);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
+ return 0;
mutex_lock(&priv->control_lock);
- rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ err = __mlx5_tc_ct_flow_offload(priv, attr);
mutex_unlock(&priv->control_lock);
- return rule;
+ return err;
}
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_ct_flow *ct_flow,
struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
- struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
-
- mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
- mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
-
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
- mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
-
- kfree(ct_flow->pre_ct_attr);
- kfree(ct_flow);
+ mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft);
}
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr)
{
- struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
-
- /* We are called on error to clean up stuff from parsing
- * but we don't have anything for now
- */
- if (!ct_flow)
+ if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
return;
mutex_lock(&priv->control_lock);
- __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ __mlx5_tc_ct_delete_flow(priv, attr);
mutex_unlock(&priv->control_lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 5c5ddaa83055..8e9316fa46d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -25,11 +25,11 @@ struct nf_flowtable;
struct mlx5_ct_attr {
u16 zone;
u16 ct_action;
- struct mlx5_ct_flow *ct_flow;
struct nf_flowtable *nf_ft;
u32 ct_labels_id;
u32 act_miss_mapping;
u64 act_miss_cookie;
+ struct mlx5_ct_ft *ft;
};
#define zone_to_reg_ct {\
@@ -113,15 +113,12 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack);
-struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+int
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr);
+
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr);
@@ -130,10 +127,6 @@ bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
-int
-mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts);
-
#else /* CONFIG_MLX5_TC_CT */
static inline struct mlx5_tc_ct_priv *
@@ -176,16 +169,8 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
}
static inline int
-mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
@@ -193,13 +178,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
return -EOPNOTSUPP;
}
-static inline struct mlx5_flow_handle *
+static inline int
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+ struct mlx5_flow_attr *attr)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index 451fd4342a5a..ba2b1f24ff14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -25,12 +25,11 @@ enum {
MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
- MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
- MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
- MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
- MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
- MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11,
- MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 12,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 10,
+ MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 11,
};
struct mlx5e_tc_flow_parse_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 651be7aaf7d5..47381e949f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -77,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
}
/* TX */
+struct mlx5e_xmit_data {
+ dma_addr_t dma_addr;
+ void *data;
+ u32 len : 31;
+ u32 has_frags : 1;
+};
+
+struct mlx5e_xmit_data_frags {
+ struct mlx5e_xmit_data xd;
+ struct skb_shared_info *sinfo;
+ dma_addr_t *dma_arr;
+};
+
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 15d15d0e5ef9..f0e6095809fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -34,6 +34,7 @@
#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
+#include <linux/bitfield.h>
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
@@ -60,9 +61,8 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct xdp_buff *xdp)
{
struct page *page = virt_to_page(xdp->data);
- struct skb_shared_info *sinfo = NULL;
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ struct mlx5e_xmit_data_frags xdptxdf = {};
+ struct mlx5e_xmit_data *xdptxd;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
int i;
@@ -71,8 +71,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
if (unlikely(!xdpf))
return false;
- xdptxd.data = xdpf->data;
- xdptxd.len = xdpf->len;
+ xdptxd = &xdptxdf.xd;
+ xdptxd->data = xdpf->data;
+ xdptxd->len = xdpf->len;
+ xdptxd->has_frags = xdp_frame_has_frags(xdpf);
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
@@ -87,24 +89,29 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
*/
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
- xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ if (unlikely(xdptxd->has_frags))
+ return false;
- dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len,
DMA_TO_DEVICE);
if (dma_mapping_error(sq->pdev, dma_addr)) {
xdp_return_frame(xdpf);
return false;
}
- xdptxd.dma_addr = dma_addr;
- xdpi.frame.xdpf = xdpf;
- xdpi.frame.dma_addr = dma_addr;
+ xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr });
return true;
}
@@ -114,17 +121,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
* mode.
*/
- xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
- xdpi.page.rq = rq;
-
dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
- dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL);
- if (unlikely(xdp_frame_has_frags(xdpf))) {
- sinfo = xdp_get_shared_info_from_frame(xdpf);
+ if (xdptxd->has_frags) {
+ xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
+ xdptxdf.dma_arr = NULL;
- for (i = 0; i < sinfo->nr_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
+ for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
dma_addr_t addr;
u32 len;
@@ -136,22 +141,34 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
}
}
- xdptxd.dma_addr = dma_addr;
+ xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
- xdpi.page.page = page;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
-
- if (unlikely(xdp_frame_has_frags(xdpf))) {
- for (i = 0; i < sinfo->nr_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
-
- xdpi.page.page = skb_frag_page(frag);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE });
+
+ if (xdptxd->has_frags) {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .page.num = 1 + xdptxdf.sinfo->nr_frags });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.page = page });
+ for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .page.page = skb_frag_page(frag) });
}
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.num = 1 });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .page.page = page });
}
return true;
@@ -169,14 +186,72 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
-static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
+/* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
+#define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
+#define RSS_L4 GENMASK(1, 0)
+#define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
+
+/* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
+enum mlx5_rss_hash_type {
+ RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
+ RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
+ RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
+ RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
+ RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
+ RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
+ RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
+ RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
+ RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
+ FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
+};
+
+/* Invalid combinations will simply return zero, allows no boundary checks */
+static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = {
+ [RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE,
+ [1] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [2] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [3] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4,
+ [RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP,
+ [RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP,
+ [RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC,
+ [RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6,
+ [RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP,
+ [RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP,
+ [RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC,
+ [12] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [13] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [14] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+ [15] = XDP_RSS_TYPE_NONE, /* Implicit zero */
+};
+
+static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
{
const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
+ const struct mlx5_cqe64 *cqe = _ctx->cqe;
+ u32 hash_type, l4_type, ip_type, lookup;
if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
return -ENODATA;
- *hash = be32_to_cpu(_ctx->cqe->rss_hash_result);
+ *hash = be32_to_cpu(cqe->rss_hash_result);
+
+ hash_type = cqe->rss_hash_type;
+ BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */
+ ip_type = hash_type & CQE_RSS_HTYPE_IP;
+ l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type);
+ lookup = ip_type | l4_type;
+ *rss_type = mlx5_xdp_rss_type[lookup];
+
return 0;
}
@@ -322,26 +397,43 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result);
+ int check_result);
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result)
+ int check_result)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_xmit_data *p = xdptxd;
+ struct mlx5e_xmit_data tmp;
- if (unlikely(sinfo)) {
- /* MPWQE is enabled, but a multi-buffer packet is queued for
- * transmission. MPWQE can't send fragmented packets, so close
- * the current session and fall back to a regular WQE.
- */
- if (unlikely(sq->mpwqe.wqe))
- mlx5e_xdp_mpwqe_complete(sq);
- return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ if (xdptxd->has_frags) {
+ struct mlx5e_xmit_data_frags *xdptxdf =
+ container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
+
+ if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, 0);
+ }
+ if (!xdptxd->len) {
+ skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
+
+ tmp.data = skb_frag_address(frag);
+ tmp.len = skb_frag_size(frag);
+ tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] :
+ page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ p = &tmp;
+ }
}
- if (unlikely(xdptxd->len > sq->hw_mtu)) {
+ if (unlikely(p->len > sq->hw_mtu)) {
stats->err++;
return false;
}
@@ -359,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
+ mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
@@ -387,8 +479,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo, int check_result)
+ int check_result)
{
+ struct mlx5e_xmit_data_frags *xdptxdf =
+ container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_data_seg *dseg;
@@ -400,26 +494,34 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
u16 ds_cnt, inline_hdr_sz;
u8 num_wqebbs = 1;
int num_frags = 0;
+ bool inline_ok;
+ bool linear;
u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE ||
+ dma_len >= MLX5E_XDP_MIN_INLINE;
+
+ if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
- ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ inline_hdr_sz = 0;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
- ds_cnt++;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+
+ linear = !!(dma_len - inline_hdr_sz);
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz;
/* check_result must be 0 if sinfo is passed. */
if (!check_result) {
int stop_room = 1;
- if (unlikely(sinfo)) {
- ds_cnt += sinfo->nr_frags;
- num_frags = sinfo->nr_frags;
+ if (xdptxd->has_frags) {
+ ds_cnt += xdptxdf->sinfo->nr_frags;
+ num_frags = xdptxdf->sinfo->nr_frags;
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
* enough to hold all fragments.
@@ -440,53 +542,53 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
eseg = &wqe->eth;
dseg = wqe->data;
- inline_hdr_sz = 0;
-
/* copy the inline part if required */
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ if (inline_hdr_sz) {
memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
- MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
- dma_len -= MLX5E_XDP_MIN_INLINE;
- dma_addr += MLX5E_XDP_MIN_INLINE;
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz - sizeof(eseg->inline_hdr.start));
+ dma_len -= inline_hdr_sz;
+ dma_addr += inline_hdr_sz;
dseg++;
}
/* write the dma part */
- dseg->addr = cpu_to_be64(dma_addr);
- dseg->byte_count = cpu_to_be32(dma_len);
+ if (linear) {
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+ dseg++;
+ }
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
- u8 num_pkts = 1 + num_frags;
+ if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
int i;
memset(&cseg->trailer, 0, sizeof(cseg->trailer));
memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- dseg->lkey = sq->mkey_be;
for (i = 0; i < num_frags; i++) {
- skb_frag_t *frag = &sinfo->frags[i];
+ skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
dma_addr_t addr;
- addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
+ page_pool_get_dma_addr(skb_frag_page(frag)) +
skb_frag_off(frag);
- dseg++;
dseg->addr = cpu_to_be64(addr);
dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
dseg->lkey = sq->mkey_be;
+ dseg++;
}
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = num_wqebbs,
- .num_pkts = num_pkts,
+ .num_pkts = 1,
};
sq->pc += num_wqebbs;
@@ -511,20 +613,61 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
u16 i;
for (i = 0; i < wi->num_pkts; i++) {
- struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
switch (xdpi.mode) {
- case MLX5E_XDP_XMIT_MODE_FRAME:
+ case MLX5E_XDP_XMIT_MODE_FRAME: {
/* XDP_TX from the XSK RQ and XDP_REDIRECT */
- dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
- xdpi.frame.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ xdpf = xdpi.frame.xdpf;
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ dma_addr = xdpi.frame.dma_addr;
+
+ dma_unmap_single(sq->pdev, dma_addr,
+ xdpf->len, DMA_TO_DEVICE);
+ if (xdp_frame_has_frags(xdpf)) {
+ struct skb_shared_info *sinfo;
+ int j;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (j = 0; j < sinfo->nr_frags; j++) {
+ skb_frag_t *frag = &sinfo->frags[j];
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ dma_addr = xdpi.frame.dma_addr;
+
+ dma_unmap_single(sq->pdev, dma_addr,
+ skb_frag_size(frag), DMA_TO_DEVICE);
+ }
+ }
+ xdp_return_frame_bulk(xdpf, bq);
break;
- case MLX5E_XDP_XMIT_MODE_PAGE:
+ }
+ case MLX5E_XDP_XMIT_MODE_PAGE: {
/* XDP_TX from the regular RQ */
- page_pool_put_defragged_page(xdpi.page.rq->page_pool,
- xdpi.page.page, -1, true);
+ u8 num, n = 0;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ num = xdpi.page.num;
+
+ do {
+ struct page *page;
+
+ xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+ page = xdpi.page.page;
+
+ /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
+ * as we know this is a page_pool page.
+ */
+ page_pool_put_defragged_page(page->pp,
+ page, -1, true);
+ } while (++n < num);
+
break;
+ }
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
(*xsk_frames)++;
@@ -658,34 +801,79 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
sq = &priv->channels.c[sq_num]->xdpsq;
for (i = 0; i < n; i++) {
+ struct mlx5e_xmit_data_frags xdptxdf = {};
struct xdp_frame *xdpf = frames[i];
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ dma_addr_t dma_arr[MAX_SKB_FRAGS];
+ struct mlx5e_xmit_data *xdptxd;
bool ret;
- xdptxd.data = xdpf->data;
- xdptxd.len = xdpf->len;
- xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
- xdptxd.len, DMA_TO_DEVICE);
+ xdptxd = &xdptxdf.xd;
+ xdptxd->data = xdpf->data;
+ xdptxd->len = xdpf->len;
+ xdptxd->has_frags = xdp_frame_has_frags(xdpf);
+ xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data,
+ xdptxd->len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr)))
break;
- xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
- xdpi.frame.xdpf = xdpf;
- xdpi.frame.dma_addr = xdptxd.dma_addr;
+ if (xdptxd->has_frags) {
+ int j;
+
+ xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
+ xdptxdf.dma_arr = dma_arr;
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) {
+ skb_frag_t *frag = &xdptxdf.sinfo->frags[j];
+
+ dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag),
+ skb_frag_size(frag), DMA_TO_DEVICE);
+
+ if (!dma_mapping_error(sq->pdev, dma_arr[j]))
+ continue;
+ /* mapping error */
+ while (--j >= 0)
+ dma_unmap_single(sq->pdev, dma_arr[j],
+ skb_frag_size(&xdptxdf.sinfo->frags[j]),
+ DMA_TO_DEVICE);
+ goto out;
+ }
+ }
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
+ mlx5e_xmit_xdp_frame, sq, xdptxd, 0);
if (unlikely(!ret)) {
- dma_unmap_single(sq->pdev, xdptxd.dma_addr,
- xdptxd.len, DMA_TO_DEVICE);
+ int j;
+
+ dma_unmap_single(sq->pdev, xdptxd->dma_addr,
+ xdptxd->len, DMA_TO_DEVICE);
+ if (!xdptxd->has_frags)
+ break;
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
+ dma_unmap_single(sq->pdev, dma_arr[j],
+ skb_frag_size(&xdptxdf.sinfo->frags[j]),
+ DMA_TO_DEVICE);
break;
}
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr });
+ if (xdptxd->has_frags) {
+ int j;
+
+ for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
+ (union mlx5e_xdp_info)
+ { .frame.dma_addr = dma_arr[j] });
+ }
nxmit++;
}
+out:
if (flags & XDP_XMIT_FLUSH) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 10bcfa6f88c1..9e8e6184f9e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -50,6 +50,53 @@ struct mlx5e_xdp_buff {
struct mlx5e_rq *rq;
};
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+ /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+ * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+ * returned.
+ */
+ MLX5E_XDP_XMIT_MODE_FRAME,
+
+ /* The xdp_frame was created in place as a result of XDP_TX from a
+ * regular RQ. No DMA remapping happened, and the page belongs to us.
+ */
+ MLX5E_XDP_XMIT_MODE_PAGE,
+
+ /* No xdp_frame was created at all, the transmit happened from a UMEM
+ * page. The UMEM Completion Ring producer pointer has to be increased.
+ */
+ MLX5E_XDP_XMIT_MODE_XSK,
+};
+
+/* xmit_mode entry is pushed to the fifo per packet, followed by multiple
+ * entries, as follows:
+ *
+ * MLX5E_XDP_XMIT_MODE_FRAME:
+ * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num.
+ * 'num' is derived from xdpf.
+ *
+ * MLX5E_XDP_XMIT_MODE_PAGE:
+ * num, page_1, page_2, ... , page_num.
+ *
+ * MLX5E_XDP_XMIT_MODE_XSK:
+ * none.
+ */
+union mlx5e_xdp_info {
+ enum mlx5e_xdp_xmit_mode mode;
+ union {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ } frame;
+ union {
+ struct mlx5e_rq *rq;
+ u8 num;
+ struct page *page;
+ } page;
+};
+
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
@@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
@@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
- struct mlx5e_xdp_info *xi)
+ union mlx5e_xdp_info xi)
{
u32 i = (*fifo->pc)++ & fifo->mask;
- fifo->xi[i] = *xi;
+ fifo->xi[i] = xi;
}
-static inline struct mlx5e_xdp_info
+static inline union mlx5e_xdp_info
mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
{
return fifo->xi[(*fifo->cc)++ & fifo->mask];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 367a9505ca4f..597f319d4770 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
* same.
*/
static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_info *xdpi)
+ union mlx5e_xdp_info *xdpi)
{
u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
@@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
wi->num_pkts = 1;
nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi);
sq->doorbell_cseg = &nopwqe->ctrl;
}
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
- struct mlx5e_xmit_data xdptxd;
- struct mlx5e_xdp_info xdpi;
+ union mlx5e_xdp_info xdpi;
bool work_done = true;
bool flush = false;
@@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xmit_xdp_frame_check_mpwqe,
mlx5e_xmit_xdp_frame_check,
sq);
+ struct mlx5e_xmit_data xdptxd = {};
struct xdp_desc desc;
bool ret;
@@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd,
check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
@@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xsk_tx_post_err(sq, &xdpi);
} else {
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
}
flush = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 91fa0a366316..55b38544422f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -35,11 +35,15 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <net/netevent.h>
#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"
+#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
+#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
+
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
@@ -50,27 +54,71 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
}
+static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_dwork *dwork =
+ container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
+ struct xfrm_state *x = sa_entry->x;
+
+ spin_lock(&x->lock);
+ xfrm_state_check_expire(x);
+ if (x->km.state == XFRM_STATE_EXPIRED) {
+ sa_entry->attrs.drop = true;
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
+ }
+ spin_unlock(&x->lock);
+
+ if (sa_entry->attrs.drop)
+ return;
+
+ queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
+ MLX5_IPSEC_RESCHED);
+}
+
static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
{
- struct xfrm_replay_state_esn *replay_esn;
+ struct xfrm_state *x = sa_entry->x;
u32 seq_bottom = 0;
+ u32 esn, esn_msb;
u8 overlap;
- if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
- sa_entry->esn_state.trigger = 0;
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_PACKET:
+ switch (x->xso.dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ esn = x->replay_esn->seq;
+ esn_msb = x->replay_esn->seq_hi;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ esn = x->replay_esn->oseq;
+ esn_msb = x->replay_esn->oseq_hi;
+ break;
+ default:
+ WARN_ON(true);
+ return false;
+ }
+ break;
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ /* Already parsed by XFRM core */
+ esn = x->replay_esn->seq;
+ break;
+ default:
+ WARN_ON(true);
return false;
}
- replay_esn = sa_entry->x->replay_esn;
- if (replay_esn->seq >= replay_esn->replay_window)
- seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
-
overlap = sa_entry->esn_state.overlap;
- sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
- htonl(seq_bottom));
+ if (esn >= x->replay_esn->replay_window)
+ seq_bottom = esn - x->replay_esn->replay_window + 1;
+
+ if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
+ esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
+
+ sa_entry->esn_state.esn = esn;
+ sa_entry->esn_state.esn_msb = esn_msb;
- sa_entry->esn_state.trigger = 1;
if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
sa_entry->esn_state.overlap = 0;
return true;
@@ -87,25 +135,161 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct xfrm_state *x = sa_entry->x;
+ s64 start_value, n;
- attrs->hard_packet_limit = x->lft.hard_packet_limit;
+ attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
+ attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
if (x->lft.soft_packet_limit == XFRM_INF)
return;
- /* Hardware decrements hard_packet_limit counter through
- * the operation. While fires an event when soft_packet_limit
- * is reached. It emans that we need substitute the numbers
- * in order to properly count soft limit.
+ /* Compute hard limit initial value and number of rounds.
+ *
+ * The counting pattern of hardware counter goes:
+ * value -> 2^31-1
+ * 2^31 | (2^31-1) -> 2^31-1
+ * 2^31 | (2^31-1) -> 2^31-1
+ * [..]
+ * 2^31 | (2^31-1) -> 0
+ *
+ * The pattern is created by using an ASO operation to atomically set
+ * bit 31 after the down counter clears bit 31. This is effectively an
+ * atomic addition of 2**31 to the counter.
+ *
+ * We wish to configure the counter, within the above pattern, so that
+ * when it reaches 0, it has hit the hard limit. This is defined by this
+ * system of equations:
*
- * As an example:
- * XFRM user sets soft limit is 2 and hard limit is 9 and
- * expects to see soft event after 2 packets and hard event
- * after 9 packets. In our case, the hard limit will be set
- * to 9 and soft limit is comparator to 7 so user gets the
- * soft event after 2 packeta
+ * hard_limit == start_value + n * 2^31
+ * n >= 0
+ * start_value < 2^32, start_value >= 0
+ *
+ * These equations are not single-solution, there are often two choices:
+ * hard_limit == start_value + n * 2^31
+ * hard_limit == (start_value+2^31) + (n-1) * 2^31
+ *
+ * The algorithm selects the solution that keeps the counter value
+ * above 2^31 until the final iteration.
+ */
+
+ /* Start by estimating n and compute start_value */
+ n = attrs->lft.hard_packet_limit / BIT_ULL(31);
+ start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
+
+ /* Choose the best of the two solutions: */
+ if (n >= 1)
+ n -= 1;
+
+ /* Computed values solve the system of equations: */
+ start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
+
+ /* The best solution means: when there are multiple iterations we must
+ * start above 2^31 and count down to 2**31 to get the interrupt.
+ */
+ attrs->lft.hard_packet_limit = lower_32_bits(start_value);
+ attrs->lft.numb_rounds_hard = (u64)n;
+
+ /* Compute soft limit initial value and number of rounds.
+ *
+ * The soft_limit is achieved by adjusting the counter's
+ * interrupt_value. This is embedded in the counting pattern created by
+ * hard packet calculations above.
+ *
+ * We wish to compute the interrupt_value for the soft_limit. This is
+ * defined by this system of equations:
+ *
+ * soft_limit == start_value - soft_value + n * 2^31
+ * n >= 0
+ * soft_value < 2^32, soft_value >= 0
+ * for n == 0 start_value > soft_value
+ *
+ * As with compute_hard_n_value() the equations are not single-solution.
+ * The algorithm selects the solution that has:
+ * 2^30 <= soft_limit < 2^31 + 2^30
+ * for the interior iterations, which guarantees a large guard band
+ * around the counter hard limit and next interrupt.
+ */
+
+ /* Start by estimating n and compute soft_value */
+ n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
+ x->lft.soft_packet_limit;
+
+ /* Compare against constraints and adjust n */
+ if (n < 0)
+ n = 0;
+ else if (start_value >= BIT_ULL(32))
+ n -= 1;
+ else if (start_value < 0)
+ n += 1;
+
+ /* Choose the best of the two solutions: */
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
+ if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
+ n += 1;
+
+ /* Note that the upper limit of soft_value happens naturally because we
+ * always select the lowest soft_value.
*/
- attrs->soft_packet_limit =
- x->lft.hard_packet_limit - x->lft.soft_packet_limit;
+
+ /* Computed values solve the system of equations: */
+ start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
+
+ /* The best solution means: when there are multiple iterations we must
+ * not fall below 2^30 as that would get too close to the false
+ * hard_limit and when we reach an interior iteration for soft_limit it
+ * has to be far away from 2**32-1 which is the counter reset point
+ * after the +2^31 to accommodate latency.
+ */
+ attrs->lft.soft_packet_limit = lower_32_bits(start_value);
+ attrs->lft.numb_rounds_soft = (u64)n;
+}
+
+static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct xfrm_state *x = sa_entry->x;
+ struct net_device *netdev;
+ struct neighbour *n;
+ u8 addr[ETH_ALEN];
+ const void *pkey;
+ u8 *dst, *src;
+
+ if (attrs->mode != XFRM_MODE_TUNNEL ||
+ attrs->type != XFRM_DEV_OFFLOAD_PACKET)
+ return;
+
+ netdev = x->xso.real_dev;
+
+ mlx5_query_mac_address(mdev, addr);
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ src = attrs->dmac;
+ dst = attrs->smac;
+ pkey = &attrs->saddr.a4;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ src = attrs->smac;
+ dst = attrs->dmac;
+ pkey = &attrs->daddr.a4;
+ break;
+ default:
+ return;
+ }
+
+ ether_addr_copy(src, addr);
+ n = neigh_lookup(&arp_tbl, pkey, netdev);
+ if (!n) {
+ n = neigh_create(&arp_tbl, pkey, netdev);
+ if (IS_ERR(n))
+ return;
+ neigh_event_send(n, NULL);
+ attrs->drop = true;
+ } else {
+ neigh_ha_snapshot(addr, n, netdev);
+ ether_addr_copy(dst, addr);
+ }
+ neigh_release(n);
}
void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
@@ -141,11 +325,11 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
aes_gcm->icv_len = x->aead->alg_icv_len;
/* esn */
- if (sa_entry->esn_state.trigger) {
- attrs->esn_trigger = true;
- attrs->esn = sa_entry->esn_state.esn;
- attrs->esn_overlap = sa_entry->esn_state.overlap;
- attrs->replay_window = x->replay_esn->replay_window;
+ if (x->props.flags & XFRM_STATE_ESN) {
+ attrs->replay_esn.trigger = true;
+ attrs->replay_esn.esn = sa_entry->esn_state.esn;
+ attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
+ attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
}
attrs->dir = x->xso.dir;
@@ -163,8 +347,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->upspec.sport = ntohs(x->sel.sport);
attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
attrs->upspec.proto = x->sel.proto;
+ attrs->mode = x->props.mode;
mlx5e_ipsec_init_limits(sa_entry, attrs);
+ mlx5e_ipsec_init_macs(sa_entry, attrs);
}
static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
@@ -233,6 +419,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
+ if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
+ return -EINVAL;
+ }
+
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
@@ -240,11 +431,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT &&
- x->props.mode != XFRM_MODE_TUNNEL) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
- return -EINVAL;
- }
break;
case XFRM_DEV_OFFLOAD_PACKET:
if (!(mlx5_ipsec_device_caps(mdev) &
@@ -253,8 +439,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode");
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
return -EINVAL;
}
@@ -283,6 +470,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
return -EINVAL;
}
+
+ if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
+ NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
+ return -EINVAL;
+ }
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
@@ -291,14 +483,134 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return 0;
}
-static void _update_xfrm_state(struct work_struct *work)
+static void mlx5e_ipsec_modify_state(struct work_struct *_work)
{
- struct mlx5e_ipsec_modify_state_work *modify_work =
- container_of(work, struct mlx5e_ipsec_modify_state_work, work);
- struct mlx5e_ipsec_sa_entry *sa_entry = container_of(
- modify_work, struct mlx5e_ipsec_sa_entry, modify_work);
+ struct mlx5e_ipsec_work *work =
+ container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+
+ attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
- mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs);
+ mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
+}
+
+static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_state *x = sa_entry->x;
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
+ x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
+ return;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
+ return;
+ }
+
+ sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
+}
+
+static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_work *work =
+ container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
+ struct mlx5e_ipsec_netevent_data *data = work->data;
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+
+ attrs = &sa_entry->attrs;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ ether_addr_copy(attrs->smac, data->addr);
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ether_addr_copy(attrs->dmac, data->addr);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ }
+ attrs->drop = false;
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
+}
+
+static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct mlx5e_ipsec_work *work;
+ void *data = NULL;
+
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ if (!(x->props.flags & XFRM_STATE_ESN))
+ return 0;
+ break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ return 0;
+ break;
+ default:
+ break;
+ }
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
+ break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
+ GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
+ break;
+ default:
+ break;
+ }
+
+ work->data = data;
+ work->sa_entry = sa_entry;
+ sa_entry->work = work;
+ return 0;
+
+free_work:
+ kfree(work);
+ return -ENOMEM;
+}
+
+static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct mlx5e_ipsec_dwork *dwork;
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ return 0;
+
+ if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
+ return 0;
+
+ if (x->lft.soft_packet_limit == XFRM_INF &&
+ x->lft.hard_packet_limit == XFRM_INF)
+ return 0;
+
+ dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
+ if (!dwork)
+ return -ENOMEM;
+
+ dwork->sa_entry = sa_entry;
+ INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit);
+ sa_entry->dwork = dwork;
+ return 0;
}
static int mlx5e_xfrm_add_state(struct xfrm_state *x,
@@ -332,18 +644,36 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
goto err_xfrm;
/* check esn */
- mlx5e_ipsec_update_esn_state(sa_entry);
+ if (x->props.flags & XFRM_STATE_ESN)
+ mlx5e_ipsec_update_esn_state(sa_entry);
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
+
+ err = mlx5_ipsec_create_work(sa_entry);
+ if (err)
+ goto err_xfrm;
+
+ err = mlx5e_ipsec_create_dwork(sa_entry);
+ if (err)
+ goto release_work;
+
/* create hw context */
err = mlx5_ipsec_create_sa_ctx(sa_entry);
if (err)
- goto err_xfrm;
+ goto release_dwork;
err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
if (err)
goto err_hw_ctx;
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
+ err = -EINVAL;
+ goto err_add_rule;
+ }
+
/* We use *_bh() variant because xfrm_timer_handler(), which runs
* in softirq context, can reach our state delete logic and we need
* xa_erase_bh() there.
@@ -353,11 +683,17 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (err)
goto err_add_rule;
- if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT)
- sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
- mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+ mlx5e_ipsec_set_esn_ops(sa_entry);
+
+ if (sa_entry->dwork)
+ queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
+ MLX5_IPSEC_RESCHED);
+
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ x->props.mode == XFRM_MODE_TUNNEL)
+ xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
+ MLX5E_IPSEC_TUNNEL_SA);
- INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
out:
x->xso.offload_handle = (unsigned long)sa_entry;
return 0;
@@ -366,15 +702,22 @@ err_add_rule:
mlx5e_accel_ipsec_fs_del_rule(sa_entry);
err_hw_ctx:
mlx5_ipsec_free_sa_ctx(sa_entry);
+release_dwork:
+ kfree(sa_entry->dwork);
+release_work:
+ if (sa_entry->work)
+ kfree(sa_entry->work->data);
+ kfree(sa_entry->work);
err_xfrm:
kfree(sa_entry);
- NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
+ NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
return err;
}
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *old;
@@ -383,6 +726,12 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
WARN_ON(old != sa_entry);
+
+ if (attrs->mode == XFRM_MODE_TUNNEL &&
+ attrs->type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Make sure that no ARP requests are running in parallel */
+ flush_workqueue(ipsec->wq);
+
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
@@ -392,13 +741,62 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
goto sa_entry_free;
- cancel_work_sync(&sa_entry->modify_work.work);
+ if (sa_entry->work)
+ cancel_work_sync(&sa_entry->work->work);
+
+ if (sa_entry->dwork)
+ cancel_delayed_work_sync(&sa_entry->dwork->dwork);
+
mlx5e_accel_ipsec_fs_del_rule(sa_entry);
mlx5_ipsec_free_sa_ctx(sa_entry);
+ kfree(sa_entry->dwork);
+ if (sa_entry->work)
+ kfree(sa_entry->work->data);
+ kfree(sa_entry->work);
sa_entry_free:
kfree(sa_entry);
}
+static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+ struct mlx5e_ipsec_netevent_data *data;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5e_ipsec *ipsec;
+ struct neighbour *n = ptr;
+ struct net_device *netdev;
+ struct xfrm_state *x;
+ unsigned long idx;
+
+ if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
+ return NOTIFY_DONE;
+
+ ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
+ xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
+ attrs = &sa_entry->attrs;
+
+ if (attrs->family == AF_INET) {
+ if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
+ !neigh_key_eq32(n, &attrs->daddr.a4))
+ continue;
+ } else {
+ if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
+ !neigh_key_eq128(n, &attrs->daddr.a4))
+ continue;
+ }
+
+ x = sa_entry->x;
+ netdev = x->xso.real_dev;
+ data = sa_entry->work->data;
+
+ neigh_ha_snapshot(data->addr, n, netdev);
+ queue_work(ipsec->wq, &sa_entry->work->work);
+ }
+
+ return NOTIFY_DONE;
+}
+
void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec *ipsec;
@@ -415,8 +813,8 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
ipsec->mdev = priv->mdev;
- ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
- priv->netdev->name);
+ ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
+ priv->netdev->name);
if (!ipsec->wq)
goto err_wq;
@@ -427,6 +825,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
goto err_aso;
}
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
+ ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
+ ret = register_netevent_notifier(&ipsec->netevent_nb);
+ if (ret)
+ goto clear_aso;
+ }
+
ret = mlx5e_accel_ipsec_fs_init(ipsec);
if (ret)
goto err_fs_init;
@@ -437,6 +842,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
return;
err_fs_init:
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
+clear_aso:
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
err_aso:
@@ -455,6 +863,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
return;
mlx5e_accel_ipsec_fs_cleanup(ipsec);
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
@@ -480,16 +890,18 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
- struct mlx5e_ipsec_modify_state_work *modify_work =
- &sa_entry->modify_work;
+ struct mlx5e_ipsec_work *work = sa_entry->work;
+ struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
bool need_update;
need_update = mlx5e_ipsec_update_esn_state(sa_entry);
if (!need_update)
return;
- mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
- queue_work(sa_entry->ipsec->wq, &modify_work->work);
+ sa_entry_shadow = work->data;
+ memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
+ queue_work(sa_entry->ipsec->wq, &work->work);
}
static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 68ae5230eb75..4e9887171508 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -60,10 +60,24 @@ struct upspec {
u8 proto;
};
+struct mlx5_ipsec_lft {
+ u64 hard_packet_limit;
+ u64 soft_packet_limit;
+ u64 numb_rounds_hard;
+ u64 numb_rounds_soft;
+};
+
+struct mlx5_replay_esn {
+ u32 replay_window;
+ u32 esn;
+ u32 esn_msb;
+ u8 overlap : 1;
+ u8 trigger : 1;
+};
+
struct mlx5_accel_esp_xfrm_attrs {
- u32 esn;
u32 spi;
- u32 flags;
+ u32 mode;
struct aes_gcm_keymat aes_gcm;
union {
@@ -78,15 +92,15 @@ struct mlx5_accel_esp_xfrm_attrs {
struct upspec upspec;
u8 dir : 2;
- u8 esn_overlap : 1;
- u8 esn_trigger : 1;
u8 type : 2;
+ u8 drop : 1;
u8 family;
- u32 replay_window;
+ struct mlx5_replay_esn replay_esn;
u32 authsize;
u32 reqid;
- u64 hard_packet_limit;
- u64 soft_packet_limit;
+ struct mlx5_ipsec_lft lft;
+ u8 smac[ETH_ALEN];
+ u8 dmac[ETH_ALEN];
};
enum mlx5_ipsec_cap {
@@ -95,6 +109,7 @@ enum mlx5_ipsec_cap {
MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2,
MLX5_IPSEC_CAP_ROCE = 1 << 3,
MLX5_IPSEC_CAP_PRIO = 1 << 4,
+ MLX5_IPSEC_CAP_TUNNEL = 1 << 5,
};
struct mlx5e_priv;
@@ -125,8 +140,17 @@ struct mlx5e_ipsec_tx;
struct mlx5e_ipsec_work {
struct work_struct work;
- struct mlx5e_ipsec *ipsec;
- u32 id;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ void *data;
+};
+
+struct mlx5e_ipsec_netevent_data {
+ u8 addr[ETH_ALEN];
+};
+
+struct mlx5e_ipsec_dwork {
+ struct delayed_work dwork;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
};
struct mlx5e_ipsec_aso {
@@ -149,12 +173,13 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_tx *tx;
struct mlx5e_ipsec_aso *aso;
struct notifier_block nb;
+ struct notifier_block netevent_nb;
struct mlx5_ipsec_fs *roce;
};
struct mlx5e_ipsec_esn_state {
u32 esn;
- u8 trigger: 1;
+ u32 esn_msb;
u8 overlap: 1;
};
@@ -165,9 +190,10 @@ struct mlx5e_ipsec_rule {
struct mlx5_fc *fc;
};
-struct mlx5e_ipsec_modify_state_work {
- struct work_struct work;
- struct mlx5_accel_esp_xfrm_attrs attrs;
+struct mlx5e_ipsec_limits {
+ u64 round;
+ u8 soft_limit_hit : 1;
+ u8 fix_limit : 1;
};
struct mlx5e_ipsec_sa_entry {
@@ -180,7 +206,9 @@ struct mlx5e_ipsec_sa_entry {
u32 ipsec_obj_id;
u32 enc_key_id;
struct mlx5e_ipsec_rule ipsec_rule;
- struct mlx5e_ipsec_modify_state_work modify_work;
+ struct mlx5e_ipsec_work *work;
+ struct mlx5e_ipsec_dwork *dwork;
+ struct mlx5e_ipsec_limits limits;
};
struct mlx5_accel_pol_xfrm_attrs {
@@ -222,6 +250,8 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
+void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry);
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
@@ -257,7 +287,7 @@ static inline bool addr6_all_zero(__be32 *addr6)
{
static const __be32 zaddr6[4] = {};
- return !memcmp(addr6, zaddr6, sizeof(*zaddr6));
+ return !memcmp(addr6, zaddr6, sizeof(zaddr6));
}
#else
static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 0539640a4d88..dbe87bf89c0d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -4,12 +4,15 @@
#include <linux/netdevice.h>
#include "en.h"
#include "en/fs.h"
+#include "eswitch.h"
#include "ipsec.h"
#include "fs_core.h"
#include "lib/ipsec_fs_roce.h"
#include "lib/fs_chains.h"
#define NUM_IPSEC_FTE BIT(15)
+#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16
+#define IPSEC_TUNNEL_DEFAULT_TTL 0x40
struct mlx5e_ipsec_fc {
struct mlx5_fc *cnt;
@@ -36,6 +39,7 @@ struct mlx5e_ipsec_rx {
struct mlx5e_ipsec_rule status;
struct mlx5e_ipsec_fc *fc;
struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
struct mlx5e_ipsec_tx {
@@ -45,6 +49,7 @@ struct mlx5e_ipsec_tx {
struct mlx5_flow_namespace *ns;
struct mlx5e_ipsec_fc *fc;
struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
/* IPsec RX flow steering */
@@ -118,7 +123,7 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio)
static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
int level, int prio,
- int max_num_groups)
+ int max_num_groups, u32 flags)
{
struct mlx5_flow_table_attr ft_attr = {};
@@ -127,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
ft_attr.max_fte = NUM_IPSEC_FTE;
ft_attr.level = level;
ft_attr.prio = prio;
+ ft_attr.flags = flags;
return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}
@@ -251,7 +257,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
mlx5_del_flow_rules(rx->sa.rule);
mlx5_destroy_flow_group(rx->sa.group);
mlx5_destroy_flow_table(rx->ft.sa);
-
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
mlx5_destroy_flow_table(rx->ft.status);
@@ -267,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination default_dest;
struct mlx5_flow_destination dest[2];
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family));
@@ -277,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
return err;
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
- MLX5E_NIC_PRIO, 1);
+ MLX5E_NIC_PRIO, 1, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft_status;
@@ -300,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
goto err_add;
/* Create FT */
- ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (rx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2,
+ flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft;
@@ -327,7 +339,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
}
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -356,6 +368,8 @@ err_pol_ft:
err_fs:
mlx5_destroy_flow_table(rx->ft.sa);
err_fs_ft:
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
err_add:
@@ -490,7 +504,8 @@ err_rule:
}
/* IPsec TX flow steering */
-static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
+static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
+ struct mlx5_ipsec_fs *roce)
{
mlx5_ipsec_fs_roce_tx_destroy(roce);
if (tx->chains) {
@@ -502,6 +517,8 @@ static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
}
mlx5_destroy_flow_table(tx->ft.sa);
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(tx->status.rule);
mlx5_destroy_flow_table(tx->ft.status);
}
@@ -511,9 +528,10 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
- ft = ipsec_ft_create(tx->ns, 2, 0, 1);
+ ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0);
if (IS_ERR(ft))
return PTR_ERR(ft);
tx->ft.status = ft;
@@ -522,7 +540,11 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
if (err)
goto err_status_rule;
- ft = ipsec_ft_create(tx->ns, 1, 0, 4);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (tx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_sa_ft;
@@ -541,7 +563,7 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
goto connect_roce;
}
- ft = ipsec_ft_create(tx->ns, 0, 0, 2);
+ ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -572,6 +594,8 @@ err_roce:
err_pol_ft:
mlx5_destroy_flow_table(tx->ft.sa);
err_sa_ft:
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(tx->status.rule);
err_status_rule:
mlx5_destroy_flow_table(tx->ft.status);
@@ -600,7 +624,7 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
if (--tx->ft.refcnt)
return;
- tx_destroy(tx, ipsec->roce);
+ tx_destroy(ipsec->mdev, tx, ipsec->roce);
}
static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
@@ -829,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
return 0;
}
+static int
+setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ struct ip_esp_hdr *esp_hdr;
+ struct ipv6hdr *ipv6hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *iphdr;
+ char *reformatbf;
+ size_t bfflen;
+ void *hdr;
+
+ bfflen = sizeof(*eth_hdr);
+
+ if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
+ bfflen += sizeof(*esp_hdr) + 8;
+
+ switch (attrs->family) {
+ case AF_INET:
+ bfflen += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ bfflen += sizeof(*ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ reformatbf = kzalloc(bfflen, GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ eth_hdr = (struct ethhdr *)reformatbf;
+ switch (attrs->family) {
+ case AF_INET:
+ eth_hdr->h_proto = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ eth_hdr->h_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ ether_addr_copy(eth_hdr->h_dest, attrs->dmac);
+ ether_addr_copy(eth_hdr->h_source, attrs->smac);
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL;
+ reformat_params->param_0 = attrs->authsize;
+
+ hdr = reformatbf + sizeof(*eth_hdr);
+ switch (attrs->family) {
+ case AF_INET:
+ iphdr = (struct iphdr *)hdr;
+ memcpy(&iphdr->saddr, &attrs->saddr.a4, 4);
+ memcpy(&iphdr->daddr, &attrs->daddr.a4, 4);
+ iphdr->version = 4;
+ iphdr->ihl = 5;
+ iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL;
+ iphdr->protocol = IPPROTO_ESP;
+ hdr += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ ipv6hdr = (struct ipv6hdr *)hdr;
+ memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16);
+ memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16);
+ ipv6hdr->nexthdr = IPPROTO_ESP;
+ ipv6hdr->version = 6;
+ ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL;
+ hdr += sizeof(*ipv6hdr);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ esp_hdr = (struct ip_esp_hdr *)hdr;
+ esp_hdr->spi = htonl(attrs->spi);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ reformat_params->size = bfflen;
+ reformat_params->data = reformatbf;
+ return 0;
+
+free_reformatbf:
+ kfree(reformatbf);
+ return -EINVAL;
+}
+
+static int
+setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ u8 *reformatbf;
+ __be32 spi;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ if (attrs->family == AF_INET)
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
+ else
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
+
+ reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE,
+ GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ /* convert to network format */
+ spi = htonl(attrs->spi);
+ memcpy(reformatbf, &spi, sizeof(spi));
+
+ reformat_params->param_0 = attrs->authsize;
+ reformat_params->size =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
+ reformat_params->data = reformatbf;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5_flow_act *flow_act)
{
- enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
struct mlx5_pkt_reformat_params reformat_params = {};
struct mlx5_pkt_reformat *pkt_reformat;
- u8 reformatbf[16] = {};
- __be32 spi;
+ enum mlx5_flow_namespace_type ns_type;
+ int ret;
- if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
- reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
- goto cmd;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ default:
+ return -EINVAL;
}
- if (attrs->family == AF_INET)
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
- else
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
-
- /* convert to network format */
- spi = htonl(attrs->spi);
- memcpy(reformatbf, &spi, 4);
+ switch (attrs->mode) {
+ case XFRM_MODE_TRANSPORT:
+ ret = setup_pkt_transport_reformat(attrs, &reformat_params);
+ break;
+ case XFRM_MODE_TUNNEL:
+ ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- reformat_params.param_0 = attrs->authsize;
- reformat_params.size = sizeof(reformatbf);
- reformat_params.data = &reformatbf;
+ if (ret)
+ return ret;
-cmd:
pkt_reformat =
mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
+ kfree(reformat_params.data);
if (IS_ERR(pkt_reformat))
return PTR_ERR(pkt_reformat);
@@ -926,9 +1091,12 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ if (attrs->drop)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ else
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[0].ft = rx->ft.status;
dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -1018,9 +1186,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
flow_act.crypto.obj_id = sa_entry->ipsec_obj_id;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ if (attrs->drop)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ else
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
dest[0].ft = tx->ft.status;
dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -1080,16 +1252,16 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
setup_fte_no_frags(spec);
setup_fte_upper_proto_match(spec, &attrs->upspec);
- if (attrs->reqid) {
+ switch (attrs->action) {
+ case XFRM_POLICY_ALLOW:
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ if (!attrs->reqid)
+ break;
+
err = setup_modify_header(mdev, attrs->reqid,
XFRM_DEV_OFFLOAD_OUT, &flow_act);
if (err)
goto err_mod_header;
- }
-
- switch (attrs->action) {
- case XFRM_POLICY_ALLOW:
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
break;
case XFRM_POLICY_BLOCK:
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
@@ -1101,7 +1273,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
default:
WARN_ON(true);
err = -EINVAL;
- goto err_action;
+ goto err_mod_header;
}
flow_act.flags |= FLOW_ACT_NO_APPEND;
@@ -1121,7 +1293,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
return 0;
err_action:
- if (attrs->reqid)
+ if (flow_act.modify_hdr)
mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
err_mod_header:
kvfree(spec);
@@ -1430,3 +1602,31 @@ err_rx_ipv4:
kfree(ipsec->tx);
return err;
}
+
+void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_sa_entry sa_entry_shadow = {};
+ int err;
+
+ memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry));
+ memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule));
+
+ err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow);
+ if (err)
+ return;
+
+ mlx5e_accel_ipsec_fs_del_rule(sa_entry);
+ memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry));
+}
+
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_rx *rx =
+ ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family);
+ struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx;
+
+ if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
+ return tx->allow_tunnel_mode;
+
+ return rx->allow_tunnel_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 5342b0b07681..df90e19066bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -8,6 +8,7 @@
enum {
MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+ MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET,
};
u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
@@ -47,6 +48,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
caps |= MLX5_IPSEC_CAP_PRIO;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+ reformat_l2_to_l3_esp_tunnel) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ reformat_l3_esp_tunnel_to_l2))
+ caps |= MLX5_IPSEC_CAP_TUNNEL;
}
if (mlx5_get_roce_state(mdev) &&
@@ -75,15 +82,17 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
void *aso_ctx;
aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso);
- if (attrs->esn_trigger) {
+ if (attrs->replay_esn.trigger) {
MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1);
if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
MLX5_SET(ipsec_aso, aso_ctx, window_sz,
- attrs->replay_window / 64);
+ attrs->replay_esn.replay_window / 64);
MLX5_SET(ipsec_aso, aso_ctx, mode,
MLX5_IPSEC_ASO_REPLAY_PROTECTION);
- }
+ }
+ MLX5_SET(ipsec_aso, aso_ctx, mode_parameter,
+ attrs->replay_esn.esn);
}
/* ASO context */
@@ -100,15 +109,15 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN);
- if (attrs->hard_packet_limit != XFRM_INF) {
+ if (attrs->lft.hard_packet_limit != XFRM_INF) {
MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt,
- lower_32_bits(attrs->hard_packet_limit));
+ attrs->lft.hard_packet_limit);
MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1);
}
- if (attrs->soft_packet_limit != XFRM_INF) {
+ if (attrs->lft.soft_packet_limit != XFRM_INF) {
MLX5_SET(ipsec_aso, aso_ctx, remove_flow_soft_lft,
- lower_32_bits(attrs->soft_packet_limit));
+ attrs->lft.soft_packet_limit);
MLX5_SET(ipsec_aso, aso_ctx, soft_lft_arm, 1);
}
@@ -135,10 +144,10 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
/* esn */
- if (attrs->esn_trigger) {
+ if (attrs->replay_esn.trigger) {
MLX5_SET(ipsec_obj, obj, esn_en, 1);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
- MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb);
+ MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap);
}
MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
@@ -224,9 +233,6 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
void *obj;
int err;
- if (!attrs->esn_trigger)
- return 0;
-
general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
return -EINVAL;
@@ -254,8 +260,8 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
MLX5_SET64(ipsec_obj, obj, modify_field_select,
MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP |
MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB);
- MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
- MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb);
+ MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap);
/* general object fields set */
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
@@ -275,29 +281,24 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
}
-static void
-mlx5e_ipsec_aso_update_esn(struct mlx5e_ipsec_sa_entry *sa_entry,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
+static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_wqe_aso_ctrl_seg *data)
{
- struct mlx5_wqe_aso_ctrl_seg data = {};
-
- data.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6;
- data.condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | MLX5_ASO_ALWAYS_TRUE
- << 4;
- data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
- data.bitwise_data = cpu_to_be64(BIT_ULL(54));
- data.data_mask = data.bitwise_data;
+ data->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6;
+ data->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
- mlx5e_ipsec_aso_query(sa_entry, &data);
+ mlx5e_ipsec_aso_query(sa_entry, data);
}
static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
u32 mode_param)
{
struct mlx5_accel_esp_xfrm_attrs attrs = {};
+ struct mlx5_wqe_aso_ctrl_seg data = {};
if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) {
- sa_entry->esn_state.esn++;
+ sa_entry->esn_state.esn_msb++;
sa_entry->esn_state.overlap = 0;
} else {
sa_entry->esn_state.overlap = 1;
@@ -305,25 +306,129 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
mlx5_accel_esp_modify_xfrm(sa_entry, &attrs);
- mlx5e_ipsec_aso_update_esn(sa_entry, &attrs);
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ data.bitwise_data = cpu_to_be64(BIT_ULL(54));
+ data.data_mask = data.bitwise_data;
+
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_aso_update_hard(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_wqe_aso_ctrl_seg data = {};
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ data.bitwise_data = cpu_to_be64(BIT_ULL(57) + BIT_ULL(31));
+ data.data_mask = data.bitwise_data;
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry,
+ u32 val)
+{
+ struct mlx5_wqe_aso_ctrl_seg data = {};
+
+ data.data_offset_condition_operand =
+ MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET;
+ data.bitwise_data = cpu_to_be64(val);
+ data.data_mask = cpu_to_be64(U32_MAX);
+ mlx5e_ipsec_aso_update(sa_entry, &data);
+}
+
+static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ struct mlx5e_ipsec_aso *aso = ipsec->aso;
+ bool soft_arm, hard_arm;
+ u64 hard_cnt;
+
+ lockdep_assert_held(&sa_entry->x->lock);
+
+ soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm);
+ hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm);
+ if (!soft_arm && !hard_arm)
+ /* It is not lifetime event */
+ return;
+
+ hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt);
+ if (!hard_cnt || hard_arm) {
+ /* It is possible to see packet counter equal to zero without
+ * hard limit event armed. Such situation can be if packet
+ * decreased, while we handled soft limit event.
+ *
+ * However it will be HW/FW bug if hard limit event is raised
+ * and packet counter is not zero.
+ */
+ WARN_ON_ONCE(hard_arm && hard_cnt);
+
+ /* Notify about hard limit */
+ xfrm_state_check_expire(sa_entry->x);
+ return;
+ }
+
+ /* We are in soft limit event. */
+ if (!sa_entry->limits.soft_limit_hit &&
+ sa_entry->limits.round == attrs->lft.numb_rounds_soft) {
+ sa_entry->limits.soft_limit_hit = true;
+ /* Notify about soft limit */
+ xfrm_state_check_expire(sa_entry->x);
+
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_hard)
+ goto hard;
+
+ if (attrs->lft.soft_packet_limit > BIT_ULL(31)) {
+ /* We cannot avoid a soft_value that might have the high
+ * bit set. For instance soft_value=2^31+1 cannot be
+ * adjusted to the low bit clear version of soft_value=1
+ * because it is too close to 0.
+ *
+ * Thus we have this corner case where we can hit the
+ * soft_limit with the high bit set, but cannot adjust
+ * the counter. Thus we set a temporary interrupt_value
+ * at least 2^30 away from here and do the adjustment
+ * then.
+ */
+ mlx5e_ipsec_aso_update_soft(sa_entry,
+ BIT_ULL(31) - BIT_ULL(30));
+ sa_entry->limits.fix_limit = true;
+ return;
+ }
+
+ sa_entry->limits.fix_limit = true;
+ }
+
+hard:
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) {
+ mlx5e_ipsec_aso_update_soft(sa_entry, 0);
+ attrs->lft.soft_packet_limit = XFRM_INF;
+ return;
+ }
+
+ mlx5e_ipsec_aso_update_hard(sa_entry);
+ sa_entry->limits.round++;
+ if (sa_entry->limits.round == attrs->lft.numb_rounds_soft)
+ mlx5e_ipsec_aso_update_soft(sa_entry,
+ attrs->lft.soft_packet_limit);
+ if (sa_entry->limits.fix_limit) {
+ sa_entry->limits.fix_limit = false;
+ mlx5e_ipsec_aso_update_soft(sa_entry, BIT_ULL(31) - 1);
+ }
}
static void mlx5e_ipsec_handle_event(struct work_struct *_work)
{
struct mlx5e_ipsec_work *work =
container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->data;
struct mlx5_accel_esp_xfrm_attrs *attrs;
- struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5e_ipsec_aso *aso;
- struct mlx5e_ipsec *ipsec;
int ret;
- sa_entry = xa_load(&work->ipsec->sadb, work->id);
- if (!sa_entry)
- goto out;
-
- ipsec = sa_entry->ipsec;
- aso = ipsec->aso;
+ aso = sa_entry->ipsec->aso;
attrs = &sa_entry->attrs;
spin_lock(&sa_entry->x->lock);
@@ -331,21 +436,18 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
if (ret)
goto unlock;
- if (attrs->esn_trigger &&
+ if (attrs->replay_esn.trigger &&
!MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) {
u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter);
mlx5e_ipsec_update_esn_state(sa_entry, mode_param);
}
- if (attrs->soft_packet_limit != XFRM_INF)
- if (!MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm) ||
- !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm))
- xfrm_state_check_expire(sa_entry->x);
+ if (attrs->lft.soft_packet_limit != XFRM_INF)
+ mlx5e_ipsec_handle_limits(sa_entry);
unlock:
spin_unlock(&sa_entry->x->lock);
-out:
kfree(work);
}
@@ -353,6 +455,7 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event,
void *data)
{
struct mlx5e_ipsec *ipsec = container_of(nb, struct mlx5e_ipsec, nb);
+ struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5_eqe_obj_change *object;
struct mlx5e_ipsec_work *work;
struct mlx5_eqe *eqe = data;
@@ -367,13 +470,16 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event,
if (type != MLX5_GENERAL_OBJECT_TYPES_IPSEC)
return NOTIFY_DONE;
+ sa_entry = xa_load(&ipsec->sadb, be32_to_cpu(object->obj_id));
+ if (!sa_entry)
+ return NOTIFY_DONE;
+
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5e_ipsec_handle_event);
- work->ipsec = ipsec;
- work->id = be32_to_cpu(object->obj_id);
+ work->data = sa_entry;
queue_work(ipsec->wq, &work->work);
return NOTIFY_OK;
@@ -464,6 +570,7 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_wqe_aso_ctrl_seg *ctrl;
struct mlx5e_hw_objs *res;
struct mlx5_aso_wqe *wqe;
+ unsigned long expires;
u8 ds_cnt;
int ret;
@@ -485,7 +592,12 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_aso_copy(ctrl, data);
mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl);
- ret = mlx5_aso_poll_cq(aso->aso, false);
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ ret = mlx5_aso_poll_cq(aso->aso, false);
+ if (ret)
+ usleep_range(2, 10);
+ } while (ret && time_is_after_jiffies(expires));
spin_unlock_bh(&aso->lock);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 33b3620ea45c..f3428dbeb298 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -4,6 +4,7 @@
#include <linux/mlx5/device.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/xarray.h>
+#include <linux/if_vlan.h>
#include "en.h"
#include "lib/aso.h"
@@ -348,12 +349,21 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
sa->macsec_rule = NULL;
}
+static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (is_vlan_dev(dev))
+ return netdev_priv(vlan_dev_priv(dev)->real_dev);
+#endif
+ return netdev_priv(dev);
+}
+
static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
struct mlx5e_macsec_sa *sa,
bool encrypt,
bool is_tx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec *macsec = priv->macsec;
struct mlx5_macsec_rule_attrs rule_attrs;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -427,7 +437,7 @@ static int macsec_rx_sa_active_update(struct macsec_context *ctx,
struct mlx5e_macsec_sa *rx_sa,
bool active)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec *macsec = priv->macsec;
int err = 0;
@@ -508,9 +518,9 @@ static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_ke
static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
const struct macsec_secy *secy = ctx->secy;
struct mlx5e_macsec_device *macsec_device;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -583,9 +593,9 @@ out:
static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_sa *tx_sa;
@@ -645,7 +655,7 @@ out:
static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_sa *tx_sa;
@@ -696,7 +706,7 @@ static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t
static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
{
struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -776,7 +786,7 @@ out:
static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -854,7 +864,7 @@ static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec
static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
struct mlx5e_macsec *macsec;
@@ -890,8 +900,8 @@ out:
static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5_core_dev *mdev = priv->mdev;
u8 assoc_num = ctx->sa.assoc_num;
@@ -976,8 +986,8 @@ out:
static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
u8 assoc_num = ctx->sa.assoc_num;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -1033,7 +1043,7 @@ out:
static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
sci_t sci = ctx->sa.rx_sa->sc->sci;
struct mlx5e_macsec_rx_sc *rx_sc;
@@ -1085,7 +1095,7 @@ out:
static int mlx5e_macsec_add_secy(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
const struct net_device *netdev = ctx->netdev;
struct mlx5e_macsec_device *macsec_device;
@@ -1137,7 +1147,7 @@ out:
static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
struct mlx5e_macsec_device *macsec_device)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
struct mlx5e_macsec *macsec = priv->macsec;
struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
@@ -1184,8 +1194,8 @@ out:
*/
static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
{
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
const struct net_device *dev = ctx->secy->netdev;
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_sa *tx_sa;
@@ -1240,7 +1250,7 @@ out:
static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
{
- struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
struct mlx5e_macsec_sa *tx_sa;
@@ -1741,7 +1751,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
{
struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata);
- struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_priv *priv = macsec_netdev_priv(netdev);
struct mlx5e_macsec_rx_sc *rx_sc;
struct mlx5e_macsec *macsec;
u32 fs_id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
index 5b658a5588c6..7fc901a6ec5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -4,6 +4,7 @@
#include <net/macsec.h>
#include <linux/netdevice.h>
#include <linux/mlx5/qp.h>
+#include <linux/if_vlan.h>
#include "fs_core.h"
#include "en/fs.h"
#include "en_accel/macsec_fs.h"
@@ -292,8 +293,6 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
}
/* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
- memset(&flow_act, 0, sizeof(flow_act));
- memset(spec, 0, sizeof(*spec));
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
@@ -510,6 +509,8 @@ static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
macsec_fs_tx_ft_put(macsec_fs);
}
+#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1
+
static union mlx5e_macsec_rule *
macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
const struct macsec_context *macsec_ctx,
@@ -555,6 +556,10 @@ macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
reformat_params.size = reformat_size;
reformat_params.data = reformatbf;
+
+ if (is_vlan_dev(macsec_ctx->netdev))
+ reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES;
+
flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
&reformat_params,
MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
@@ -1109,7 +1114,6 @@ static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
static union mlx5e_macsec_rule *
macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
- const struct macsec_context *macsec_ctx,
struct mlx5_macsec_rule_attrs *attrs,
u32 fs_id)
{
@@ -1334,7 +1338,7 @@ mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
{
return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) :
- macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+ macsec_fs_rx_add_rule(macsec_fs, attrs, *sa_fs_id);
}
void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ec72743b64e2..2fda7385de71 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -803,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog)
+ pool_size *= 2; /* additional page per packet for the linear part */
+
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
@@ -1300,17 +1303,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
- int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+ int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
+ * entries of all xmit_modes.
+ */
size_t size;
- size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
+ size = array_size(sizeof(*xdpi_fifo->xi), entries);
xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
return -ENOMEM;
xdpi_fifo->pc = &sq->xdpi_fifo_pc;
xdpi_fifo->cc = &sq->xdpi_fifo_cc;
- xdpi_fifo->mask = dsegs_per_wq - 1;
+ xdpi_fifo->mask = entries - 1;
return 0;
}
@@ -1860,11 +1865,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
- * supported by upstream, and there is no defined trigger to allow
- * transmitting redirected multi-buffer frames.
- */
- if (param->is_xdp_mb && !is_redirect)
+ if (param->is_xdp_mb)
set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
@@ -1888,7 +1889,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg;
sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = 1,
@@ -1897,9 +1897,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
-
- dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
- dseg->lkey = sq->mkey_be;
}
}
@@ -4066,9 +4063,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev)
val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
- NETDEV_XDP_ACT_NDO_XMIT;
- if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
- val |= NETDEV_XDP_ACT_RX_SG;
+ NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(netdev, val);
}
@@ -4262,19 +4259,24 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev,
/* No XSK params: AF_XDP can't be enabled yet at the point of setting
* the XDP program.
*/
- is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
-
- if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
- netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
- params->sw_mtu,
- mlx5e_xdp_max_mtu(params, NULL));
- return false;
- }
- if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
- netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
- params->sw_mtu,
- mlx5e_xdp_max_mtu(params, NULL));
- return false;
+ is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ?
+ mlx5e_rx_is_linear_skb(mdev, params, NULL) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL);
+
+ if (!is_linear) {
+ if (!params->xdp_prog->aux->xdp_has_frags) {
+ netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) {
+ netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
}
return true;
@@ -4766,20 +4768,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
queue_work(priv->wq, &priv->tx_timeout_work);
}
-static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
{
- struct net_device *netdev = priv->netdev;
- struct mlx5e_params new_params;
-
- if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
- new_params = priv->channels.params;
- new_params.xdp_prog = prog;
-
- if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
+ if (!mlx5e_params_validate_xdp(netdev, mdev, params))
return -EINVAL;
return 0;
@@ -4806,8 +4803,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
+
if (prog) {
- err = mlx5e_xdp_allowed(priv, prog);
+ err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params);
if (err)
goto unlock;
}
@@ -4815,22 +4815,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
- new_params = priv->channels.params;
- new_params.xdp_prog = prog;
-
- /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
- * supported with the new parameters: if PAGE_SIZE is bigger than
- * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
- * the MTU is small enough for the linear mode, because XDP uses strides
- * of PAGE_SIZE on regular RQs.
- */
- if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
- /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
- err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
- if (err)
- goto unlock;
- }
-
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
@@ -5125,6 +5109,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->vlan_features |= NETIF_F_SG;
netdev->vlan_features |= NETIF_F_HW_CSUM;
+ netdev->vlan_features |= NETIF_F_HW_MACSEC;
netdev->vlan_features |= NETIF_F_GRO;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1049805571c6..a8c2ae389d6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -471,6 +471,35 @@ static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
return i;
}
+static void
+mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo,
+ struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page,
+ u32 frag_offset, u32 len)
+{
+ skb_frag_t *frag;
+
+ dma_addr_t addr = page_pool_get_dma_addr(frag_page->page);
+
+ dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir);
+ if (!xdp_buff_has_frags(xdp)) {
+ /* Init on the first fragment to avoid cold cache access
+ * when possible.
+ */
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(xdp);
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, frag_page->page);
+ skb_frag_off_set(frag, frag_offset);
+ skb_frag_size_set(frag, len);
+
+ if (page_is_pfmemalloc(frag_page->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+ sinfo->xdp_frags_size += len;
+}
+
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
struct page *page, u32 frag_offset, u32 len,
@@ -1601,10 +1630,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
}
static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- void *va, u16 headroom, u32 len,
+ void *va, u16 headroom, u32 frame_sz, u32 len,
struct mlx5e_xdp_buff *mxbuf)
{
- xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
+ xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true);
mxbuf->cqe = cqe;
mxbuf->rq = rq;
@@ -1637,7 +1666,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf))
return NULL; /* page/packet was consumed by XDP */
@@ -1685,7 +1715,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom);
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ frag_consumed_bytes, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
truesize = 0;
@@ -1694,35 +1725,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
wi++;
while (cqe_bcnt) {
- skb_frag_t *frag;
-
frag_page = wi->frag_page;
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
- addr = page_pool_get_dma_addr(frag_page->page);
- dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
- frag_consumed_bytes, rq->buff.map_dir);
-
- if (!xdp_buff_has_frags(&mxbuf.xdp)) {
- /* Init on the first fragment to avoid cold cache access
- * when possible.
- */
- sinfo->nr_frags = 0;
- sinfo->xdp_frags_size = 0;
- xdp_buff_set_frags_flag(&mxbuf.xdp);
- }
-
- frag = &sinfo->frags[sinfo->nr_frags++];
-
- __skb_frag_set_page(frag, frag_page->page);
- skb_frag_off_set(frag, wi->offset);
- skb_frag_size_set(frag, frag_consumed_bytes);
-
- if (page_is_pfmemalloc(frag_page->page))
- xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
-
- sinfo->xdp_frags_size += frag_consumed_bytes;
+ mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
+ wi->offset, frag_consumed_bytes);
truesize += frag_info->frag_stride;
cqe_bcnt -= frag_consumed_bytes;
@@ -1969,35 +1977,139 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
struct mlx5e_frag_page *head_page = frag_page;
- u32 frag_offset = head_offset + headlen;
- u32 byte_cnt = cqe_bcnt - headlen;
+ u32 frag_offset = head_offset;
+ u32 byte_cnt = cqe_bcnt;
+ struct skb_shared_info *sinfo;
+ struct mlx5e_xdp_buff mxbuf;
+ unsigned int truesize = 0;
+ struct bpf_prog *prog;
struct sk_buff *skb;
- dma_addr_t addr;
+ u32 linear_frame_sz;
+ u16 linear_data_len;
+ u16 linear_hr;
+ void *va;
- skb = napi_alloc_skb(rq->cq.napi,
- ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
- if (unlikely(!skb)) {
- rq->stats->buff_alloc_err++;
- return NULL;
+ prog = rcu_dereference(rq->xdp_prog);
+
+ if (prog) {
+ /* area for bpf_xdp_[store|load]_bytes */
+ net_prefetchw(page_address(frag_page->page) + frag_offset);
+ if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+ va = page_address(wi->linear_page.page);
+ net_prefetchw(va); /* xdp_frame data area */
+ linear_hr = XDP_PACKET_HEADROOM;
+ linear_data_len = 0;
+ linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD);
+ } else {
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+ skb_mark_for_recycle(skb);
+ va = skb->head;
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetchw(skb->data);
+
+ frag_offset += headlen;
+ byte_cnt -= headlen;
+ linear_hr = skb_headroom(skb);
+ linear_data_len = headlen;
+ linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb));
+ if (unlikely(frag_offset >= PAGE_SIZE)) {
+ frag_page++;
+ frag_offset -= PAGE_SIZE;
+ }
}
- net_prefetchw(skb->data);
+ mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
+
+ sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
+
+ while (byte_cnt) {
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
+ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
- /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
- if (unlikely(frag_offset >= PAGE_SIZE)) {
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ truesize += pg_consumed_bytes;
+ else
+ truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
+ pg_consumed_bytes);
+ byte_cnt -= pg_consumed_bytes;
+ frag_offset = 0;
frag_page++;
- frag_offset -= PAGE_SIZE;
}
- skb_mark_for_recycle(skb);
- mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset);
- /* copy header */
- addr = page_pool_get_dma_addr(head_page->page);
- mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
- head_offset, head_offset, headlen);
- /* skb linear part was allocated with headlen and aligned to long */
- skb->tail += headlen;
- skb->len += headlen;
+ if (prog) {
+ if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ int i;
+
+ for (i = 0; i < sinfo->nr_frags; i++)
+ /* non-atomic */
+ __set_bit(page_idx + i, wi->skip_release_bitmap);
+ return NULL;
+ }
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
+ linear_frame_sz,
+ mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
+ mxbuf.xdp.data - mxbuf.xdp.data_meta);
+ if (unlikely(!skb)) {
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+ return NULL;
+ }
+
+ skb_mark_for_recycle(skb);
+ wi->linear_page.frags++;
+ mlx5e_page_release_fragmented(rq, &wi->linear_page);
+
+ if (xdp_buff_has_frags(&mxbuf.xdp)) {
+ struct mlx5e_frag_page *pagep;
+
+ /* sinfo->nr_frags is reset by build_skb, calculate again. */
+ xdp_update_skb_shared_info(skb, frag_page - head_page,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+
+ pagep = head_page;
+ do
+ pagep->frags++;
+ while (++pagep < frag_page);
+ }
+ __pskb_pull_tail(skb, headlen);
+ } else {
+ dma_addr_t addr;
+
+ if (xdp_buff_has_frags(&mxbuf.xdp)) {
+ struct mlx5e_frag_page *pagep;
+
+ xdp_update_skb_shared_info(skb, sinfo->nr_frags,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+
+ pagep = frag_page - sinfo->nr_frags;
+ do
+ pagep->frags++;
+ while (++pagep < frag_page);
+ }
+ /* copy header */
+ addr = page_pool_get_dma_addr(head_page->page);
+ mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
+ head_offset, head_offset, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+ }
return skb;
}
@@ -2036,7 +2148,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
- mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
+ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
+ cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 083ce31f95b6..728b82ce4031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -177,7 +177,8 @@ static struct lock_class_key tc_ht_wq_key;
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
-static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr);
+static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
@@ -487,15 +488,6 @@ mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int err;
- if (attr->flags & MLX5_ATTR_FLAG_CT) {
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
- &attr->parse_attr->mod_hdr_acts;
-
- return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
- spec, attr,
- mod_hdr_acts);
- }
-
if (!is_mdev_switchdev_mode(priv->mdev))
return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
@@ -518,11 +510,6 @@ mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (attr->flags & MLX5_ATTR_FLAG_CT) {
- mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
- return;
- }
-
if (!is_mdev_switchdev_mode(priv->mdev)) {
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
return;
@@ -1395,13 +1382,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
return err;
}
- if (attr->flags & MLX5_ATTR_FLAG_CT)
- flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
- attr, &parse_attr->mod_hdr_acts);
- else
- flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
- attr);
-
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr);
return PTR_ERR_OR_ZERO(flow->rule[0]);
}
@@ -1432,9 +1413,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
flow_flag_clear(flow, OFFLOADED);
- if (attr->flags & MLX5_ATTR_FLAG_CT)
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
- else if (!IS_ERR_OR_NULL(flow->rule[0]))
+ if (!IS_ERR_OR_NULL(flow->rule[0]))
mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
/* Remove root table if no rules are left to avoid
@@ -1785,8 +1764,7 @@ out:
static void
clean_encap_dests(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr,
- bool *vf_tun)
+ struct mlx5_flow_attr *attr)
{
struct mlx5_esw_flow_attr *esw_attr;
int out_index;
@@ -1795,17 +1773,11 @@ clean_encap_dests(struct mlx5e_priv *priv,
return;
esw_attr = attr->esw_attr;
- *vf_tun = false;
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
continue;
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- *vf_tun = true;
-
mlx5e_detach_encap(priv, flow, attr, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
@@ -2028,7 +2000,7 @@ static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *
if (!attr)
return;
- mlx5_free_flow_attr(flow, attr);
+ mlx5_free_flow_attr_actions(flow, attr);
kvfree(attr->parse_attr);
kfree(attr);
}
@@ -2039,7 +2011,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- bool vf_tun;
esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
@@ -2061,18 +2032,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow->decap_route)
mlx5e_detach_decap_route(priv, flow);
- clean_encap_dests(priv, flow, attr, &vf_tun);
-
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
- mlx5e_tc_detach_mod_hdr(priv, flow, attr);
- }
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
-
if (esw_attr->int_port)
mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
@@ -2085,8 +2046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
free_flow_post_acts(flow);
- free_branch_attr(flow, attr->branch_true);
- free_branch_attr(flow, attr->branch_false);
+ mlx5_free_flow_attr_actions(flow, attr);
kvfree(attr->esw_attr->rx_tun_attr);
kvfree(attr->parse_attr);
@@ -3463,114 +3423,59 @@ struct ipv6_hoplimit_word {
};
static bool
-is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
- bool *modify_ip_header, bool *modify_tuple,
- struct netlink_ext_ack *extack)
+is_flow_action_modify_ip_header(struct flow_action *flow_action)
{
+ const struct flow_action_entry *act;
u32 mask, offset;
u8 htype;
+ int i;
- htype = act->mangle.htype;
- offset = act->mangle.offset;
- mask = ~act->mangle.mask;
/* For IPv4 & IPv6 header check 4 byte word,
* to determine that modified fields
* are NOT ttl & hop_limit only.
*/
- if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
- struct ip_ttl_word *ttl_word =
- (struct ip_ttl_word *)&mask;
-
- if (offset != offsetof(struct iphdr, ttl) ||
- ttl_word->protocol ||
- ttl_word->check) {
- *modify_ip_header = true;
- }
-
- if (offset >= offsetof(struct iphdr, saddr))
- *modify_tuple = true;
-
- if (ct_flow && *modify_tuple) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of ipv4 address with action ct");
- return false;
- }
- } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
- struct ipv6_hoplimit_word *hoplimit_word =
- (struct ipv6_hoplimit_word *)&mask;
-
- if (offset != offsetof(struct ipv6hdr, payload_len) ||
- hoplimit_word->payload_len ||
- hoplimit_word->nexthdr) {
- *modify_ip_header = true;
- }
-
- if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
- *modify_tuple = true;
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE &&
+ act->id != FLOW_ACTION_ADD)
+ continue;
- if (ct_flow && *modify_tuple) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of ipv6 address with action ct");
- return false;
+ htype = act->mangle.htype;
+ offset = act->mangle.offset;
+ mask = ~act->mangle.mask;
+
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+ struct ip_ttl_word *ttl_word =
+ (struct ip_ttl_word *)&mask;
+
+ if (offset != offsetof(struct iphdr, ttl) ||
+ ttl_word->protocol ||
+ ttl_word->check)
+ return true;
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ struct ipv6_hoplimit_word *hoplimit_word =
+ (struct ipv6_hoplimit_word *)&mask;
+
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
+ hoplimit_word->payload_len ||
+ hoplimit_word->nexthdr)
+ return true;
}
- } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
- htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
- *modify_tuple = true;
- if (ct_flow) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload re-write of transport header ports with action ct");
- return false;
- }
- }
-
- return true;
-}
-
-static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
- bool ct_flow, struct netlink_ext_ack *extack,
- struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec)
-{
- if (!modify_tuple || ct_clear)
- return true;
-
- if (ct_flow) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload tuple modification with non-clear ct()");
- netdev_info(priv->netdev,
- "can't offload tuple modification with non-clear ct()");
- return false;
- }
-
- /* Add ct_state=-trk match so it will be offloaded for non ct flows
- * (or after clear action), as otherwise, since the tuple is changed,
- * we can't restore ct state
- */
- if (mlx5_tc_ct_add_no_trk_match(spec)) {
- NL_SET_ERR_MSG_MOD(extack,
- "can't offload tuple modification with ct matches and no ct(clear) action");
- netdev_info(priv->netdev,
- "can't offload tuple modification with ct matches and no ct(clear) action");
- return false;
}
- return true;
+ return false;
}
static bool modify_header_match_supported(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_action *flow_action,
- u32 actions, bool ct_flow,
- bool ct_clear,
+ u32 actions,
struct netlink_ext_ack *extack)
{
- const struct flow_action_entry *act;
- bool modify_ip_header, modify_tuple;
+ bool modify_ip_header;
void *headers_c;
void *headers_v;
u16 ethertype;
u8 ip_proto;
- int i;
headers_c = mlx5e_get_match_headers_criteria(actions, spec);
headers_v = mlx5e_get_match_headers_value(actions, spec);
@@ -3581,23 +3486,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
goto out_ok;
- modify_ip_header = false;
- modify_tuple = false;
- flow_action_for_each(i, act, flow_action) {
- if (act->id != FLOW_ACTION_MANGLE &&
- act->id != FLOW_ACTION_ADD)
- continue;
-
- if (!is_action_keys_supported(act, ct_flow,
- &modify_ip_header,
- &modify_tuple, extack))
- return false;
- }
-
- if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
- priv, spec))
- return false;
-
+ modify_ip_header = is_flow_action_modify_ip_header(flow_action);
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
if (modify_ip_header && ip_proto != IPPROTO_TCP &&
ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
@@ -3618,19 +3507,6 @@ actions_match_supported_fdb(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
- bool ct_flow, ct_clear;
-
- ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
- ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-
- if (esw_attr->split_count && ct_flow &&
- !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
- /* All registers used by ct are cleared when using
- * split rules.
- */
- NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
- return false;
- }
if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -3651,14 +3527,9 @@ actions_match_supported(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
- bool ct_flow, ct_clear;
-
- ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
- ct_flow = flow_flag_test(flow, CT) && !ct_clear;
-
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
- actions, ct_flow, ct_clear, extack))
+ !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions,
+ extack))
return false;
if (mlx5e_is_eswitch_flow(flow) &&
@@ -3752,6 +3623,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
attr2->dest_chain = 0;
attr2->dest_ft = NULL;
attr2->act_id_restore_rule = NULL;
+ memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr));
if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
attr2->esw_attr->out_count = 0;
@@ -3805,9 +3677,7 @@ free_flow_post_acts(struct mlx5e_tc_flow *flow)
if (list_is_last(&attr->list, &flow->attrs))
break;
- mlx5_free_flow_attr(flow, attr);
- free_branch_attr(flow, attr->branch_true);
- free_branch_attr(flow, attr->branch_false);
+ mlx5_free_flow_attr_actions(flow, attr);
list_del(&attr->list);
kvfree(attr->parse_attr);
@@ -4065,76 +3935,79 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action)
{
struct netlink_ext_ack *extack = parse_state->extack;
- struct mlx5e_tc_flow_action flow_action_reorder;
struct mlx5e_tc_flow *flow = parse_state->flow;
struct mlx5e_tc_jump_state jump_state = {};
struct mlx5_flow_attr *attr = flow->attr;
enum mlx5_flow_namespace_type ns_type;
struct mlx5e_priv *priv = flow->priv;
- struct flow_action_entry *act, **_act;
+ struct mlx5_flow_attr *prev_attr;
+ struct flow_action_entry *act;
struct mlx5e_tc_act *tc_act;
+ bool is_missable;
int err, i;
- flow_action_reorder.num_entries = flow_action->num_entries;
- flow_action_reorder.entries = kcalloc(flow_action->num_entries,
- sizeof(flow_action), GFP_KERNEL);
- if (!flow_action_reorder.entries)
- return -ENOMEM;
-
- mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
-
ns_type = mlx5e_get_flow_namespace(flow);
list_add(&attr->list, &flow->attrs);
- flow_action_for_each(i, _act, &flow_action_reorder) {
+ flow_action_for_each(i, act, flow_action) {
jump_state.jump_target = false;
- act = *_act;
+ is_missable = false;
+ prev_attr = attr;
+
tc_act = mlx5e_tc_act_get(act->id, ns_type);
if (!tc_act) {
NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
err = -EOPNOTSUPP;
- goto out_free;
+ goto out_free_post_acts;
}
- if (!tc_act->can_offload(parse_state, act, i, attr)) {
+ if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) {
err = -EOPNOTSUPP;
- goto out_free;
+ goto out_free_post_acts;
}
err = tc_act->parse_action(parse_state, act, priv, attr);
if (err)
- goto out_free;
+ goto out_free_post_acts;
dec_jump_count(act, tc_act, attr, priv, &jump_state);
err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
if (err)
- goto out_free;
+ goto out_free_post_acts;
parse_state->actions |= attr->action;
- if (!tc_act->stats_action)
- attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
/* Split attr for multi table act if not the last act. */
if (jump_state.jump_target ||
(tc_act->is_multi_table_act &&
tc_act->is_multi_table_act(priv, act, attr) &&
- i < flow_action_reorder.num_entries - 1)) {
+ i < flow_action->num_entries - 1)) {
+ is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
+
err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
if (err)
- goto out_free;
+ goto out_free_post_acts;
attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
if (!attr) {
err = -ENOMEM;
- goto out_free;
+ goto out_free_post_acts;
}
list_add(&attr->list, &flow->attrs);
}
- }
- kfree(flow_action_reorder.entries);
+ if (is_missable) {
+ /* Add counter to prev, and assign act to new (next) attr */
+ prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_flag_set(flow, USE_ACT_STATS);
+
+ attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
+ } else if (!tc_act->stats_action) {
+ prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie;
+ }
+ }
err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
if (err)
@@ -4146,8 +4019,6 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
return 0;
-out_free:
- kfree(flow_action_reorder.entries);
out_free_post_acts:
free_flow_post_acts(flow);
@@ -4442,10 +4313,9 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
}
static void
-mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
{
struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
- bool vf_tun;
if (!attr)
return;
@@ -4453,7 +4323,7 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
if (attr->post_act_handle)
mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
- clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+ clean_encap_dests(flow->priv, flow, attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(counter_dev, attr->counter);
@@ -4462,6 +4332,11 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
}
+
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
+
+ free_branch_attr(flow, attr->branch_true);
+ free_branch_attr(flow, attr->branch_false);
}
static int
@@ -4923,7 +4798,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
goto errout;
}
- if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
+ if (mlx5e_is_offloaded_flow(flow)) {
if (flow_flag_test(flow, USE_ACT_STATS)) {
f->use_act_stats = true;
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index eb41f0abf798..1c35d721a31d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1070,10 +1070,11 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
list_for_each_entry(eq, &table->comp_eqs_list, list) {
if (i++ == vector)
- break;
+ return mlx5_irq_get_affinity_mask(eq->core.irq);
}
- return mlx5_irq_get_affinity_mask(eq->core.irq);
+ WARN_ON_ONCE(1);
+ return NULL;
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 3cdcb0e0b20f..1ba03e219111 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -13,66 +13,6 @@
#define CREATE_TRACE_POINTS
#include "diag/bridge_tracepoint.h"
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
- (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
-static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000);
-
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
- MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
- (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
-static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000);
-
-#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
-
-enum {
- MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
- MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
- MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
-};
-
static const struct rhashtable_params fdb_ht_params = {
.key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
.key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
@@ -80,31 +20,6 @@ static const struct rhashtable_params fdb_ht_params = {
.automatic_shrinking = true,
};
-enum {
- MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
-};
-
-struct mlx5_esw_bridge {
- int ifindex;
- int refcnt;
- struct list_head list;
- struct mlx5_esw_bridge_offloads *br_offloads;
-
- struct list_head fdb_list;
- struct rhashtable fdb_ht;
-
- struct mlx5_flow_table *egress_ft;
- struct mlx5_flow_group *egress_vlan_fg;
- struct mlx5_flow_group *egress_qinq_fg;
- struct mlx5_flow_group *egress_mac_fg;
- struct mlx5_flow_group *egress_miss_fg;
- struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
- struct mlx5_flow_handle *egress_miss_handle;
- unsigned long ageing_time;
- u32 flags;
- u16 vlan_proto;
-};
-
static void
mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
unsigned long val)
@@ -146,7 +61,7 @@ mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
}
-static struct mlx5_flow_table *
+struct mlx5_flow_table *
mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
{
struct mlx5_flow_table_attr ft_attr = {};
@@ -925,6 +840,10 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
if (err)
goto err_fdb_ht;
+ err = mlx5_esw_bridge_mdb_init(bridge);
+ if (err)
+ goto err_mdb_ht;
+
INIT_LIST_HEAD(&bridge->fdb_list);
bridge->ifindex = ifindex;
bridge->refcnt = 1;
@@ -934,6 +853,8 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
return bridge;
+err_mdb_ht:
+ rhashtable_destroy(&bridge->fdb_ht);
err_fdb_ht:
mlx5_esw_bridge_egress_table_cleanup(bridge);
err_egress_tbl:
@@ -953,7 +874,9 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
return;
mlx5_esw_bridge_egress_table_cleanup(bridge);
+ mlx5_esw_bridge_mcast_disable(bridge);
list_del(&bridge->list);
+ mlx5_esw_bridge_mdb_cleanup(bridge);
rhashtable_destroy(&bridge->fdb_ht);
kvfree(bridge);
@@ -993,7 +916,7 @@ static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_o
return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
}
-static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
{
return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
}
@@ -1018,6 +941,19 @@ static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
}
+static struct mlx5_esw_bridge *
+mlx5_esw_bridge_from_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return NULL;
+
+ return port->bridge;
+}
+
static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
{
trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
@@ -1166,8 +1102,21 @@ mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct
}
static int
-mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan,
- struct mlx5_eswitch *esw)
+mlx5_esw_bridge_vlan_push_pop_fhs_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ return mlx5_esw_bridge_vlan_mcast_init(vlan_proto, port, vlan);
+}
+
+static void
+mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(struct mlx5_esw_bridge_vlan *vlan)
+{
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+}
+
+static int
+mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
{
int err;
@@ -1185,10 +1134,16 @@ mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_
err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
if (err)
goto err_vlan_pop;
+
+ err = mlx5_esw_bridge_vlan_push_pop_fhs_create(vlan_proto, port, vlan);
+ if (err)
+ goto err_vlan_pop_fhs;
}
return 0;
+err_vlan_pop_fhs:
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
err_vlan_pop:
if (vlan->pkt_mod_hdr_push_mark)
mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
@@ -1213,7 +1168,7 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_
vlan->flags = flags;
INIT_LIST_HEAD(&vlan->fdb_list);
- err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw);
+ err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, port, vlan, esw);
if (err)
goto err_vlan_push_pop;
@@ -1225,6 +1180,8 @@ mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_
return vlan;
err_xa_insert:
+ if (vlan->mcast_handle)
+ mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan);
if (vlan->pkt_reformat_pop)
mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
if (vlan->pkt_mod_hdr_push_mark)
@@ -1242,7 +1199,8 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
xa_erase(&port->vlans, vlan->vid);
}
-static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
+static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan,
struct mlx5_esw_bridge *bridge)
{
struct mlx5_eswitch *esw = bridge->br_offloads->esw;
@@ -1250,7 +1208,10 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list)
mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+ mlx5_esw_bridge_port_mdb_vlan_flush(port, vlan);
+ if (vlan->mcast_handle)
+ mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan);
if (vlan->pkt_reformat_pop)
mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
if (vlan->pkt_mod_hdr_push_mark)
@@ -1264,7 +1225,7 @@ static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
struct mlx5_esw_bridge *bridge)
{
trace_mlx5_esw_bridge_vlan_cleanup(vlan);
- mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ mlx5_esw_bridge_vlan_flush(port, vlan, bridge);
mlx5_esw_bridge_vlan_erase(port, vlan);
kvfree(vlan);
}
@@ -1288,9 +1249,9 @@ static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port
int err;
xa_for_each(&port->vlans, i, vlan) {
- mlx5_esw_bridge_vlan_flush(vlan, bridge);
- err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan,
- br_offloads->esw);
+ mlx5_esw_bridge_vlan_flush(port, vlan, bridge);
+ err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, port,
+ vlan, br_offloads->esw);
if (err) {
esw_warn(br_offloads->esw->dev,
"Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n",
@@ -1473,33 +1434,32 @@ err_ingress_fc_create:
int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return -EINVAL;
- port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
+ bridge->ageing_time = clock_t_to_jiffies(ageing_time);
return 0;
}
int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
bool filtering;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return -EINVAL;
- bridge = port->bridge;
filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
if (filtering == enable)
return 0;
mlx5_esw_bridge_fdb_flush(bridge);
+ mlx5_esw_bridge_mdb_flush(bridge);
if (enable)
bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
else
@@ -1511,15 +1471,13 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo
int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
struct mlx5_esw_bridge_offloads *br_offloads)
{
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id,
- br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id,
+ br_offloads);
+ if (!bridge)
return -EINVAL;
- bridge = port->bridge;
if (bridge->vlan_proto == proto)
return 0;
if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) {
@@ -1528,12 +1486,43 @@ int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 pro
}
mlx5_esw_bridge_fdb_flush(bridge);
+ mlx5_esw_bridge_mdb_flush(bridge);
bridge->vlan_proto = proto;
mlx5_esw_bridge_vlans_recreate(bridge);
return 0;
}
+int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge *bridge;
+ int err = 0;
+ bool mcast;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table) ||
+ MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table_limit_regc)) ||
+ !MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_uplink_hairpin) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
+ return -EOPNOTSUPP;
+
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
+ return -EINVAL;
+
+ mcast = bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG;
+ if (mcast == enable)
+ return 0;
+
+ if (enable)
+ err = mlx5_esw_bridge_mcast_enable(bridge);
+ else
+ mlx5_esw_bridge_mcast_disable(bridge);
+
+ return err;
+}
+
static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct mlx5_esw_bridge *bridge)
@@ -1551,6 +1540,15 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
port->bridge = bridge;
port->flags |= flags;
xa_init(&port->vlans);
+
+ err = mlx5_esw_bridge_port_mcast_init(port);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+ port->vport_num, port->esw_owner_vhca_id, err);
+ goto err_port_mcast;
+ }
+
err = mlx5_esw_bridge_port_insert(port, br_offloads);
if (err) {
esw_warn(esw->dev,
@@ -1563,6 +1561,8 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
return 0;
err_port_insert:
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+err_port_mcast:
kvfree(port);
return err;
}
@@ -1580,6 +1580,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
trace_mlx5_esw_bridge_vport_cleanup(port);
mlx5_esw_bridge_port_vlans_flush(port, bridge);
+ mlx5_esw_bridge_port_mcast_cleanup(port);
mlx5_esw_bridge_port_erase(port, br_offloads);
kvfree(port);
mlx5_esw_bridge_put(br_offloads, bridge);
@@ -1711,14 +1712,12 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
struct switchdev_notifier_fdb_info *fdb_info)
{
struct mlx5_esw_bridge_fdb_entry *entry;
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return;
- bridge = port->bridge;
entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
if (!entry) {
esw_debug(br_offloads->esw->dev,
@@ -1765,14 +1764,12 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_o
{
struct mlx5_eswitch *esw = br_offloads->esw;
struct mlx5_esw_bridge_fdb_entry *entry;
- struct mlx5_esw_bridge_port *port;
struct mlx5_esw_bridge *bridge;
- port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port)
+ bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!bridge)
return;
- bridge = port->bridge;
entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
if (!entry) {
esw_debug(esw->dev,
@@ -1806,6 +1803,64 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
}
}
+int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n",
+ addr, vport_num);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n",
+ addr, vport_num);
+ return -EINVAL;
+ }
+
+ bridge = port->bridge;
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ return -EINVAL;
+ }
+ }
+
+ err = mlx5_esw_bridge_port_mdb_attach(dev, port, addr, vid);
+ if (err) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, vport_num);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ mlx5_esw_bridge_port_mdb_detach(dev, port, addr, vid);
+}
+
static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
{
struct mlx5_esw_bridge_port *port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index 10851a515bca..a9dd18c73d6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -25,12 +25,19 @@ struct mlx5_esw_bridge_offloads {
struct delayed_work update_work;
struct mlx5_flow_table *ingress_ft;
+ struct mlx5_flow_group *ingress_igmp_fg;
+ struct mlx5_flow_group *ingress_mld_fg;
struct mlx5_flow_group *ingress_vlan_fg;
struct mlx5_flow_group *ingress_vlan_filter_fg;
struct mlx5_flow_group *ingress_qinq_fg;
struct mlx5_flow_group *ingress_qinq_filter_fg;
struct mlx5_flow_group *ingress_mac_fg;
+ struct mlx5_flow_handle *igmp_handle;
+ struct mlx5_flow_handle *mld_query_handle;
+ struct mlx5_flow_handle *mld_report_handle;
+ struct mlx5_flow_handle *mld_done_handle;
+
struct mlx5_flow_table *skip_ft;
};
@@ -64,10 +71,20 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo
struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads);
int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
struct mlx5_esw_bridge_offloads *br_offloads,
struct netlink_ext_ack *extack);
void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+
#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
new file mode 100644
index 000000000000..2eae594a5e80
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
@@ -0,0 +1,1126 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lib/devcom.h"
+#include "bridge.h"
+#include "eswitch.h"
+#include "bridge_priv.h"
+#include "diag/bridge_tracepoint.h"
+
+static const struct rhashtable_params mdb_ht_params = {
+ .key_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, key),
+ .key_len = sizeof(struct mlx5_esw_bridge_mdb_key),
+ .head_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge)
+{
+ INIT_LIST_HEAD(&bridge->mdb_list);
+ return rhashtable_init(&bridge->mdb_ht, &mdb_ht_params);
+}
+
+void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ rhashtable_destroy(&bridge->mdb_ht);
+}
+
+static struct mlx5_esw_bridge_port *
+mlx5_esw_bridge_mdb_port_lookup(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ return xa_load(&entry->ports, mlx5_esw_bridge_port_key(port));
+}
+
+static int mlx5_esw_bridge_mdb_port_insert(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ int err = xa_insert(&entry->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
+
+ if (!err)
+ entry->num_ports++;
+ return err;
+}
+
+static void mlx5_esw_bridge_mdb_port_remove(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ xa_erase(&entry->ports, mlx5_esw_bridge_port_key(port));
+ entry->num_ports--;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_mdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL,
+ };
+ int num_dests = entry->num_ports, i = 0;
+ struct mlx5_flow_destination *dests;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *dmac_v, *dmac_c;
+ unsigned long idx;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ dests = kvcalloc(num_dests, sizeof(*dests), GFP_KERNEL);
+ if (!dests) {
+ kvfree(rule_spec);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xa_for_each(&entry->ports, idx, port) {
+ dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dests[i].ft = port->mcast.ft;
+ i++;
+ }
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16);
+ ether_addr_copy(dmac_v, entry->key.addr);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, outer_headers.dmac_47_16);
+ eth_broadcast_addr(dmac_c);
+
+ if (entry->key.vid) {
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ entry->key.vid);
+ }
+
+ handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, dests, num_dests);
+
+ kvfree(dests);
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int
+mlx5_esw_bridge_port_mdb_offload(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ struct mlx5_flow_handle *handle;
+
+ handle = mlx5_esw_bridge_mdb_flow_create(port->esw_owner_vhca_id, entry, port->bridge);
+ if (entry->egress_handle) {
+ mlx5_del_flow_rules(entry->egress_handle);
+ entry->egress_handle = NULL;
+ }
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ entry->egress_handle = handle;
+ return 0;
+}
+
+static struct mlx5_esw_bridge_mdb_entry *
+mlx5_esw_bridge_mdb_lookup(struct mlx5_esw_bridge *bridge,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge_mdb_key key = {};
+
+ ether_addr_copy(key.addr, addr);
+ key.vid = vid;
+ return rhashtable_lookup_fast(&bridge->mdb_ht, &key, mdb_ht_params);
+}
+
+static struct mlx5_esw_bridge_mdb_entry *
+mlx5_esw_bridge_port_mdb_entry_init(struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+ int err;
+
+ entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ ether_addr_copy(entry->key.addr, addr);
+ entry->key.vid = vid;
+ xa_init(&entry->ports);
+ err = rhashtable_insert_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params);
+ if (err)
+ goto err_ht_insert;
+
+ list_add(&entry->list, &bridge->mdb_list);
+
+ return entry;
+
+err_ht_insert:
+ xa_destroy(&entry->ports);
+ kvfree(entry);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_port_mdb_entry_cleanup(struct mlx5_esw_bridge *bridge,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ if (entry->egress_handle)
+ mlx5_del_flow_rules(entry->egress_handle);
+ list_del(&entry->list);
+ rhashtable_remove_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params);
+ xa_destroy(&entry->ports);
+ kvfree(entry);
+}
+
+int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+ int err;
+
+ if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return -EOPNOTSUPP;
+
+ entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid);
+ if (entry) {
+ if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) {
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach entry is already attached to port (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return 0;
+ }
+ } else {
+ entry = mlx5_esw_bridge_port_mdb_entry_init(port, addr, vid);
+ if (IS_ERR(entry)) {
+ err = PTR_ERR(entry);
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to init entry (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+ return err;
+ }
+ }
+
+ err = mlx5_esw_bridge_mdb_port_insert(port, entry);
+ if (err) {
+ if (!entry->num_ports)
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); /* new mdb entry */
+ esw_warn(bridge->br_offloads->esw->dev,
+ "MDB attach failed to insert port (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+ return err;
+ }
+
+ err = mlx5_esw_bridge_port_mdb_offload(port, entry);
+ if (err)
+ /* Single mdb can be used by multiple ports, so just log the
+ * error and continue.
+ */
+ esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to offload (MAC=%pM,vid=%u,vport=%u,err=%d)\n",
+ addr, vid, port->vport_num, err);
+
+ trace_mlx5_esw_bridge_port_mdb_attach(dev, entry);
+ return 0;
+}
+
+static void mlx5_esw_bridge_port_mdb_entry_detach(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_mdb_entry *entry)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ int err;
+
+ mlx5_esw_bridge_mdb_port_remove(port, entry);
+ if (!entry->num_ports) {
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry);
+ return;
+ }
+
+ err = mlx5_esw_bridge_port_mdb_offload(port, entry);
+ if (err)
+ /* Single mdb can be used by multiple ports, so just log the
+ * error and continue.
+ */
+ esw_warn(bridge->br_offloads->esw->dev, "MDB detach failed to offload (MAC=%pM,vid=%u,vport=%u)\n",
+ entry->key.addr, entry->key.vid, port->vport_num);
+}
+
+void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry;
+
+ entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid);
+ if (!entry) {
+ esw_debug(bridge->br_offloads->esw->dev,
+ "MDB detach entry not found (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return;
+ }
+
+ if (!mlx5_esw_bridge_mdb_port_lookup(port, entry)) {
+ esw_debug(bridge->br_offloads->esw->dev,
+ "MDB detach entry not attached to the port (MAC=%pM,vid=%u,vport=%u)\n",
+ addr, vid, port->vport_num);
+ return;
+ }
+
+ trace_mlx5_esw_bridge_port_mdb_detach(dev, entry);
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ if (entry->key.vid == vlan->vid && mlx5_esw_bridge_mdb_port_lookup(port, entry))
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+static void mlx5_esw_bridge_port_mdb_flush(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ if (mlx5_esw_bridge_mdb_port_lookup(port, entry))
+ mlx5_esw_bridge_port_mdb_entry_detach(port, entry);
+}
+
+void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_mdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list)
+ mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry);
+}
+static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = bridge->br_offloads->esw;
+ struct mlx5_flow_table *mcast_ft;
+
+ mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
+ esw);
+ if (IS_ERR(mcast_ft))
+ return PTR_ERR(mcast_ft);
+
+ port->mcast.ft = mcast_ft;
+ return 0;
+}
+
+static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ if (port->mcast.ft)
+ mlx5_destroy_flow_table(port->mcast.ft);
+ port->mcast.ft = NULL;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create filter flow group for bridge mcast table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN(proto=%x) flow group for bridge mcast table (err=%pe)\n",
+ vlan_proto, fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *mcast_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, mcast_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, mcast_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *mcast_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(mcast_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create forward flow group for bridge mcast table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_group *fwd_fg, *qinq_fg, *vlan_fg, *filter_fg;
+ struct mlx5_eswitch *esw = port->bridge->br_offloads->esw;
+ struct mlx5_flow_table *mcast_ft = port->mcast.ft;
+ int err;
+
+ filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft);
+ if (IS_ERR(filter_fg))
+ return PTR_ERR(filter_fg);
+
+ vlan_fg = mlx5_esw_bridge_mcast_vlan_fg_create(esw, mcast_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_mcast_qinq_fg_create(esw, mcast_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft);
+ if (IS_ERR(fwd_fg)) {
+ err = PTR_ERR(fwd_fg);
+ goto err_fwd_fg;
+ }
+
+ port->mcast.filter_fg = filter_fg;
+ port->mcast.vlan_fg = vlan_fg;
+ port->mcast.qinq_fg = qinq_fg;
+ port->mcast.fwd_fg = fwd_fg;
+
+ return 0;
+
+err_fwd_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_group(filter_fg);
+ return err;
+}
+
+static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ if (port->mcast.fwd_fg)
+ mlx5_destroy_flow_group(port->mcast.fwd_fg);
+ port->mcast.fwd_fg = NULL;
+ if (port->mcast.qinq_fg)
+ mlx5_destroy_flow_group(port->mcast.qinq_fg);
+ port->mcast.qinq_fg = NULL;
+ if (port->mcast.vlan_fg)
+ mlx5_destroy_flow_group(port->mcast.vlan_fg);
+ port->mcast.vlan_fg = NULL;
+ if (port->mcast.filter_fg)
+ mlx5_destroy_flow_group(port->mcast.filter_fg);
+ port->mcast.filter_fg = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_DROP,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num));
+
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port)
+{
+ return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
+ static struct mlx5_flow_handle *handle;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return ERR_PTR(-ENODEV);
+
+ handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = port->vport_num,
+ };
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ port->vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.pkt_reformat = vlan->pkt_reformat_pop;
+
+ if (vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, vlan->vid);
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = port->esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ struct mlx5_flow_handle *handle;
+
+ if (!(port->bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return 0;
+
+ handle = mlx5_esw_bridge_mcast_vlan_flow_create(vlan_proto, port, vlan);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ vlan->mcast_handle = handle;
+ return 0;
+}
+
+void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan)
+{
+ if (vlan->mcast_handle)
+ mlx5_del_flow_rules(vlan->mcast_handle);
+ vlan->mcast_handle = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = port->vport_num,
+ };
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ port->vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = port->esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_flow_handle *filter_handle, *fwd_handle;
+ struct mlx5_esw_bridge_vlan *vlan, *failed;
+ unsigned long index;
+ int err;
+
+
+ filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ?
+ mlx5_esw_bridge_mcast_filter_flow_peer_create(port) :
+ mlx5_esw_bridge_mcast_filter_flow_create(port);
+ if (IS_ERR(filter_handle))
+ return PTR_ERR(filter_handle);
+
+ fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port);
+ if (IS_ERR(fwd_handle)) {
+ err = PTR_ERR(fwd_handle);
+ goto err_fwd;
+ }
+
+ xa_for_each(&port->vlans, index, vlan) {
+ err = mlx5_esw_bridge_vlan_mcast_init(port->bridge->vlan_proto, port, vlan);
+ if (err) {
+ failed = vlan;
+ goto err_vlan;
+ }
+ }
+
+ port->mcast.filter_handle = filter_handle;
+ port->mcast.fwd_handle = fwd_handle;
+
+ return 0;
+
+err_vlan:
+ xa_for_each(&port->vlans, index, vlan) {
+ if (vlan == failed)
+ break;
+
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+ }
+ mlx5_del_flow_rules(fwd_handle);
+err_fwd:
+ mlx5_del_flow_rules(filter_handle);
+ return err;
+}
+
+static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long index;
+
+ xa_for_each(&port->vlans, index, vlan)
+ mlx5_esw_bridge_vlan_mcast_cleanup(vlan);
+
+ if (port->mcast.fwd_handle)
+ mlx5_del_flow_rules(port->mcast.fwd_handle);
+ port->mcast.fwd_handle = NULL;
+ if (port->mcast.filter_handle)
+ mlx5_del_flow_rules(port->mcast.filter_handle);
+ port->mcast.filter_handle = NULL;
+}
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port)
+{
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ int err;
+
+ if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+ return 0;
+
+ err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge);
+ if (err)
+ return err;
+
+ err = mlx5_esw_bridge_port_mcast_fgs_init(port);
+ if (err)
+ goto err_fgs;
+
+ err = mlx5_esw_bridge_port_mcast_fhs_init(port);
+ if (err)
+ goto err_fhs;
+ return err;
+
+err_fhs:
+ mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+err_fgs:
+ mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+ return err;
+}
+
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port)
+{
+ mlx5_esw_bridge_port_mdb_flush(port);
+ mlx5_esw_bridge_port_mcast_fhs_cleanup(port);
+ mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+ mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_protocol);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create IGMP flow group for bridge ingress table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_mld_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ if (!(MLX5_CAP_GEN(esw->dev, flex_parser_protocols) & MLX5_FLEX_PROTO_ICMPV6)) {
+ esw_warn(esw->dev,
+ "Can't create MLD flow group due to missing hardware ICMPv6 parsing support\n");
+ return NULL;
+ }
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version);
+ MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters_3.icmpv6_type);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create MLD flow group for bridge ingress table (err=%pe)\n",
+ fg);
+
+ return fg;
+}
+
+static int
+mlx5_esw_bridge_ingress_mcast_fgs_init(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_flow_group *igmp_fg, *mld_fg;
+
+ igmp_fg = mlx5_esw_bridge_ingress_igmp_fg_create(esw, ingress_ft);
+ if (IS_ERR(igmp_fg))
+ return PTR_ERR(igmp_fg);
+
+ mld_fg = mlx5_esw_bridge_ingress_mld_fg_create(esw, ingress_ft);
+ if (IS_ERR(mld_fg)) {
+ mlx5_destroy_flow_group(igmp_fg);
+ return PTR_ERR(mld_fg);
+ }
+
+ br_offloads->ingress_igmp_fg = igmp_fg;
+ br_offloads->ingress_mld_fg = mld_fg;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_ingress_mcast_fgs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ if (br_offloads->ingress_mld_fg)
+ mlx5_destroy_flow_group(br_offloads->ingress_mld_fg);
+ br_offloads->ingress_mld_fg = NULL;
+ if (br_offloads->ingress_igmp_fg)
+ mlx5_destroy_flow_group(br_offloads->ingress_igmp_fg);
+ br_offloads->ingress_igmp_fg = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_igmp_fh_create(struct mlx5_flow_table *ingress_ft,
+ struct mlx5_flow_table *skip_ft)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 4);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_protocol, IPPROTO_IGMP);
+
+ handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_mld_fh_create(u8 type, struct mlx5_flow_table *ingress_ft,
+ struct mlx5_flow_table *skip_ft)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3;
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 6);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, misc_parameters_3.icmpv6_type);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_3.icmpv6_type, type);
+
+ handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static int
+mlx5_esw_bridge_ingress_mcast_fhs_create(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_handle *igmp_handle, *mld_query_handle, *mld_report_handle,
+ *mld_done_handle;
+ struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft,
+ *skip_ft = br_offloads->skip_ft;
+ int err;
+
+ igmp_handle = mlx5_esw_bridge_ingress_igmp_fh_create(ingress_ft, skip_ft);
+ if (IS_ERR(igmp_handle))
+ return PTR_ERR(igmp_handle);
+
+ if (br_offloads->ingress_mld_fg) {
+ mld_query_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_QUERY,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_query_handle)) {
+ err = PTR_ERR(mld_query_handle);
+ goto err_mld_query;
+ }
+
+ mld_report_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REPORT,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_report_handle)) {
+ err = PTR_ERR(mld_report_handle);
+ goto err_mld_report;
+ }
+
+ mld_done_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REDUCTION,
+ ingress_ft,
+ skip_ft);
+ if (IS_ERR(mld_done_handle)) {
+ err = PTR_ERR(mld_done_handle);
+ goto err_mld_done;
+ }
+ } else {
+ mld_query_handle = NULL;
+ mld_report_handle = NULL;
+ mld_done_handle = NULL;
+ }
+
+ br_offloads->igmp_handle = igmp_handle;
+ br_offloads->mld_query_handle = mld_query_handle;
+ br_offloads->mld_report_handle = mld_report_handle;
+ br_offloads->mld_done_handle = mld_done_handle;
+
+ return 0;
+
+err_mld_done:
+ mlx5_del_flow_rules(mld_report_handle);
+err_mld_report:
+ mlx5_del_flow_rules(mld_query_handle);
+err_mld_query:
+ mlx5_del_flow_rules(igmp_handle);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ if (br_offloads->mld_done_handle)
+ mlx5_del_flow_rules(br_offloads->mld_done_handle);
+ br_offloads->mld_done_handle = NULL;
+ if (br_offloads->mld_report_handle)
+ mlx5_del_flow_rules(br_offloads->mld_report_handle);
+ br_offloads->mld_report_handle = NULL;
+ if (br_offloads->mld_query_handle)
+ mlx5_del_flow_rules(br_offloads->mld_query_handle);
+ br_offloads->mld_query_handle = NULL;
+ if (br_offloads->igmp_handle)
+ mlx5_del_flow_rules(br_offloads->igmp_handle);
+ br_offloads->igmp_handle = NULL;
+}
+
+static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port, *failed;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ err = mlx5_esw_bridge_port_mcast_init(port);
+ if (err) {
+ failed = port;
+ goto err_port;
+ }
+ }
+ return 0;
+
+err_port:
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port == failed)
+ break;
+ if (port->bridge != bridge)
+ continue;
+
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+ }
+ return err;
+}
+
+static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ mlx5_esw_bridge_port_mcast_cleanup(port);
+ }
+}
+
+static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ int err;
+
+ if (br_offloads->ingress_igmp_fg)
+ return 0; /* already enabled by another bridge */
+
+ err = mlx5_esw_bridge_ingress_mcast_fgs_init(br_offloads);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create global multicast flow groups (err=%d)\n",
+ err);
+ return err;
+ }
+
+ err = mlx5_esw_bridge_ingress_mcast_fhs_create(br_offloads);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create global multicast flows (err=%d)\n",
+ err);
+ goto err_fhs;
+ }
+
+ return 0;
+
+err_fhs:
+ mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads);
+ return err;
+}
+
+static void mlx5_esw_brige_mcast_global_disable(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *br;
+
+ list_for_each_entry(br, &br_offloads->bridges, list) {
+ /* Ingress table is global, so only disable snooping when all
+ * bridges on esw have multicast disabled.
+ */
+ if (br->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)
+ return;
+ }
+
+ mlx5_esw_bridge_ingress_mcast_fhs_cleanup(br_offloads);
+ mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads);
+}
+
+int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge)
+{
+ int err;
+
+ err = mlx5_esw_brige_mcast_global_enable(bridge->br_offloads);
+ if (err)
+ return err;
+
+ bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG;
+
+ err = mlx5_esw_brige_mcast_init(bridge);
+ if (err) {
+ esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n",
+ err);
+ bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
+ mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
+ }
+ return err;
+}
+
+void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge)
+{
+ mlx5_esw_brige_mcast_cleanup(bridge);
+ bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
+ mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
index 878311fe950a..c9595801bdb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -12,11 +12,124 @@
#include <linux/xarray.h>
#include "fs_core.h"
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE 3
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 131072
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE \
+ (524288 - MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE)
+
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 1048576);
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 131072
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (262144 - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288);
+
+#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
+
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE 4095
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1)
+
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE == 8192);
+
+enum {
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+};
+
+enum {
+ MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
+ MLX5_ESW_BRIDGE_MCAST_FLAG = BIT(1),
+};
+
struct mlx5_esw_bridge_fdb_key {
unsigned char addr[ETH_ALEN];
u16 vid;
};
+struct mlx5_esw_bridge_mdb_key {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
enum {
MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
@@ -43,6 +156,16 @@ struct mlx5_esw_bridge_fdb_entry {
struct mlx5_flow_handle *filter_handle;
};
+struct mlx5_esw_bridge_mdb_entry {
+ struct mlx5_esw_bridge_mdb_key key;
+ struct rhash_head ht_node;
+ struct list_head list;
+ struct xarray ports;
+ int num_ports;
+
+ struct mlx5_flow_handle *egress_handle;
+};
+
struct mlx5_esw_bridge_vlan {
u16 vid;
u16 flags;
@@ -50,6 +173,7 @@ struct mlx5_esw_bridge_vlan {
struct mlx5_pkt_reformat *pkt_reformat_push;
struct mlx5_pkt_reformat *pkt_reformat_pop;
struct mlx5_modify_hdr *pkt_mod_hdr_push_mark;
+ struct mlx5_flow_handle *mcast_handle;
};
struct mlx5_esw_bridge_port {
@@ -58,6 +182,63 @@ struct mlx5_esw_bridge_port {
u16 flags;
struct mlx5_esw_bridge *bridge;
struct xarray vlans;
+ struct {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *filter_fg;
+ struct mlx5_flow_group *vlan_fg;
+ struct mlx5_flow_group *qinq_fg;
+ struct mlx5_flow_group *fwd_fg;
+
+ struct mlx5_flow_handle *filter_handle;
+ struct mlx5_flow_handle *fwd_handle;
+ } mcast;
};
+struct mlx5_esw_bridge {
+ int ifindex;
+ int refcnt;
+ struct list_head list;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ struct list_head fdb_list;
+ struct rhashtable fdb_ht;
+
+ struct list_head mdb_list;
+ struct rhashtable mdb_ht;
+
+ struct mlx5_flow_table *egress_ft;
+ struct mlx5_flow_group *egress_vlan_fg;
+ struct mlx5_flow_group *egress_qinq_fg;
+ struct mlx5_flow_group *egress_mac_fg;
+ struct mlx5_flow_group *egress_miss_fg;
+ struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
+ struct mlx5_flow_handle *egress_miss_handle;
+ unsigned long ageing_time;
+ u32 flags;
+ u16 vlan_proto;
+};
+
+struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level,
+ struct mlx5_eswitch *esw);
+unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port);
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port);
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port);
+int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan);
+void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan);
+
+int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge);
+void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge);
+
+int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge);
+void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge);
+int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid);
+void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port,
+ const unsigned char *addr, u16 vid);
+void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan);
+void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge);
+
#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
index 51ac24e6ec3c..1808da214094 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -110,6 +110,41 @@ DEFINE_EVENT(mlx5_esw_bridge_port_template,
TP_ARGS(port)
);
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_mdb_port_change_template,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb),
+ TP_STRUCT__entry(
+ __array(char, dev_name, IFNAMSIZ)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __field(int, num_ports)
+ __field(bool, offloaded)),
+ TP_fast_assign(
+ strscpy(__entry->dev_name, netdev_name(dev), IFNAMSIZ);
+ memcpy(__entry->addr, mdb->key.addr, ETH_ALEN);
+ __entry->vid = mdb->key.vid;
+ __entry->num_ports = mdb->num_ports;
+ __entry->offloaded = mdb->egress_handle;),
+ TP_printk("net_device=%s addr=%pM vid=%u num_ports=%d offloaded=%d",
+ __entry->dev_name,
+ __entry->addr,
+ __entry->vid,
+ __entry->num_ports,
+ __entry->offloaded));
+
+DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template,
+ mlx5_esw_bridge_port_mdb_attach,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb));
+
+DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template,
+ mlx5_esw_bridge_port_mdb_detach,
+ TP_PROTO(const struct net_device *dev,
+ const struct mlx5_esw_bridge_mdb_entry *mdb),
+ TP_ARGS(dev, mdb));
+
#endif
/* This part must be outside protection */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 8bdf28762f41..19fed514fc17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1488,7 +1488,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *
void *hca_caps;
int err;
- if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_management_pf(dev)) {
+ if (!mlx5_core_is_ecpf(dev)) {
*max_sfs = 0;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 19e9a77c4633..e9d68fdf68f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -263,6 +263,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
+ u64 num_block_encap;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
unsigned int host_number; /* ECPF supports one external host */
@@ -748,6 +749,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
+
static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
if (mlx5_esw_allowed(esw))
@@ -761,6 +765,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
{
return esw->fdb_table.offloads.slow_fdb;
}
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -805,6 +810,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
{
return 0;
}
+
+static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ return true;
+}
+
+static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 48036dfddd5e..b6e2709c1371 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3586,6 +3586,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return err;
}
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ devl_lock(devlink);
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw)) {
+ devl_unlock(devlink);
+ /* Failure means no eswitch => not possible to change encap */
+ return true;
+ }
+
+ down_write(&esw->mode_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return false;
+ }
+
+ esw->offloads.num_block_encap++;
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return true;
+}
+
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return;
+
+ down_write(&esw->mode_lock);
+ esw->offloads.num_block_encap--;
+ up_write(&esw->mode_lock);
+}
+
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
@@ -3627,6 +3668,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
goto unlock;
}
+ if (esw->offloads.num_block_encap) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set encapsulation when IPsec SA and/or policies are configured");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8e3da9d4fe1c..19da02c41616 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2997,7 +2997,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
goto out_err;
}
- maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 4c9a40211059..932fbc843c69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -39,7 +39,7 @@
#include "clock.h"
enum {
- MLX5_CYCLES_SHIFT = 23
+ MLX5_CYCLES_SHIFT = 31
};
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f95df73d1089..a95d1218def9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -100,15 +100,19 @@ enum {
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
},
[1] = {
.mask = MLX5_PROF_MASK_QP_SIZE,
.log_max_qp = 12,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
+
},
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
.log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -174,6 +178,11 @@ static struct mlx5_profile profile[] = {
.limit = 4
},
},
+ [3] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .num_cmd_caches = 0,
+ },
};
static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index be0785f83083..5eaab99678ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -142,6 +142,7 @@ enum mlx5_semaphore_space_address {
};
#define MLX5_DEFAULT_PROF 2
+#define MLX5_SF_PROF 3
static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
size_t item_size, size_t num_items,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index a7377619ba6f..e2f26d0bc615 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -28,7 +28,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
mdev->priv.adev_idx = adev->id;
sf_dev->mdev = mdev;
- err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF);
+ err = mlx5_mdev_init(mdev, MLX5_SF_PROF);
if (err) {
mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err);
goto mdev_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index ee104cf04392..0eb9a8d7f282 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -819,14 +819,34 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
case DR_ACTION_TYP_TNL_L2_TO_L2:
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- attr.decap_index = action->rewrite->index;
- attr.decap_actions = action->rewrite->num_of_actions;
- attr.decap_with_vlan =
- attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ if (action->rewrite->ptrn && action->rewrite->arg) {
+ attr.decap_index = mlx5dr_arg_get_obj_id(action->rewrite->arg);
+ attr.decap_actions = action->rewrite->ptrn->num_of_actions;
+ attr.decap_pat_idx = action->rewrite->ptrn->index;
+ } else {
+ attr.decap_index = action->rewrite->index;
+ attr.decap_actions = action->rewrite->num_of_actions;
+ attr.decap_with_vlan =
+ attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ attr.decap_pat_idx = MLX5DR_INVALID_PATTERN_INDEX;
+ }
break;
case DR_ACTION_TYP_MODIFY_HDR:
- attr.modify_index = action->rewrite->index;
- attr.modify_actions = action->rewrite->num_of_actions;
+ if (action->rewrite->single_action_opt) {
+ attr.modify_actions = action->rewrite->num_of_actions;
+ attr.single_modify_action = action->rewrite->data;
+ } else {
+ if (action->rewrite->ptrn && action->rewrite->arg) {
+ attr.modify_index =
+ mlx5dr_arg_get_obj_id(action->rewrite->arg);
+ attr.modify_actions = action->rewrite->ptrn->num_of_actions;
+ attr.modify_pat_idx = action->rewrite->ptrn->index;
+ } else {
+ attr.modify_index = action->rewrite->index;
+ attr.modify_actions = action->rewrite->num_of_actions;
+ attr.modify_pat_idx = MLX5DR_INVALID_PATTERN_INDEX;
+ }
+ }
if (action->rewrite->modify_ttl)
dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
&recalc_cs_required);
@@ -1365,8 +1385,6 @@ out_err:
return -EINVAL;
}
-#define ACTION_CACHE_LINE_SIZE 64
-
static int
dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
u8 reformat_param_0, u8 reformat_param_1,
@@ -1403,36 +1421,25 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
}
case DR_ACTION_TYP_TNL_L3_TO_L2:
{
- u8 hw_actions[ACTION_CACHE_LINE_SIZE] = {};
+ u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {};
int ret;
ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx,
data, data_sz,
hw_actions,
- ACTION_CACHE_LINE_SIZE,
+ DR_ACTION_CACHE_LINE_SIZE,
&action->rewrite->num_of_actions);
if (ret) {
mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
return ret;
}
- action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
- DR_CHUNK_SIZE_8);
- if (!action->rewrite->chunk) {
- mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n");
- return -ENOMEM;
- }
-
- action->rewrite->data = (void *)hw_actions;
- action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr
- (action->rewrite->chunk) -
- dmn->info.caps.hdr_modify_icm_addr) /
- ACTION_CACHE_LINE_SIZE;
+ action->rewrite->data = hw_actions;
+ action->rewrite->dmn = dmn;
- ret = mlx5dr_send_postsend_action(dmn, action);
+ ret = mlx5dr_ste_alloc_modify_hdr(action);
if (ret) {
- mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n");
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ mlx5dr_dbg(dmn, "Failed preparing reformat data\n");
return ret;
}
return 0;
@@ -1963,7 +1970,6 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
__be64 actions[],
struct mlx5dr_action *action)
{
- struct mlx5dr_icm_chunk *chunk;
u32 max_hw_actions;
u32 num_hw_actions;
u32 num_sw_actions;
@@ -1980,15 +1986,9 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
return -EINVAL;
}
- chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16);
- if (!chunk)
- return -ENOMEM;
-
hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL);
- if (!hw_actions) {
- ret = -ENOMEM;
- goto free_chunk;
- }
+ if (!hw_actions)
+ return -ENOMEM;
ret = dr_actions_convert_modify_header(action,
max_hw_actions,
@@ -2000,24 +2000,24 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
if (ret)
goto free_hw_actions;
- action->rewrite->chunk = chunk;
action->rewrite->modify_ttl = modify_ttl;
action->rewrite->data = (u8 *)hw_actions;
action->rewrite->num_of_actions = num_hw_actions;
- action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
- dmn->info.caps.hdr_modify_icm_addr) /
- ACTION_CACHE_LINE_SIZE;
- ret = mlx5dr_send_postsend_action(dmn, action);
- if (ret)
- goto free_hw_actions;
+ if (num_hw_actions == 1 &&
+ dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) {
+ action->rewrite->single_action_opt = true;
+ } else {
+ action->rewrite->single_action_opt = false;
+ ret = mlx5dr_ste_alloc_modify_hdr(action);
+ if (ret)
+ goto free_hw_actions;
+ }
return 0;
free_hw_actions:
kfree(hw_actions);
-free_chunk:
- mlx5dr_icm_free_chunk(chunk);
return ret;
}
@@ -2162,7 +2162,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_TNL_L3_TO_L2:
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ mlx5dr_ste_free_modify_hdr(action);
+ kfree(action->rewrite->data);
refcount_dec(&action->rewrite->dmn->refcount);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
@@ -2173,7 +2174,8 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
refcount_dec(&action->reformat->dmn->refcount);
break;
case DR_ACTION_TYP_MODIFY_HDR:
- mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ if (!action->rewrite->single_action_opt)
+ mlx5dr_ste_free_modify_hdr(action);
kfree(action->rewrite->data);
refcount_dec(&action->rewrite->dmn->refcount);
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c
new file mode 100644
index 000000000000..01ed6442095d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "dr_types.h"
+
+#define DR_ICM_MODIFY_HDR_GRANULARITY_4K 12
+
+/* modify-header arg pool */
+enum dr_arg_chunk_size {
+ DR_ARG_CHUNK_SIZE_1,
+ DR_ARG_CHUNK_SIZE_MIN = DR_ARG_CHUNK_SIZE_1, /* keep updated when changing */
+ DR_ARG_CHUNK_SIZE_2,
+ DR_ARG_CHUNK_SIZE_3,
+ DR_ARG_CHUNK_SIZE_4,
+ DR_ARG_CHUNK_SIZE_MAX,
+};
+
+/* argument pool area */
+struct dr_arg_pool {
+ enum dr_arg_chunk_size log_chunk_size;
+ struct mlx5dr_domain *dmn;
+ struct list_head free_list;
+ struct mutex mutex; /* protect arg pool */
+};
+
+struct mlx5dr_arg_mgr {
+ struct mlx5dr_domain *dmn;
+ struct dr_arg_pool *pools[DR_ARG_CHUNK_SIZE_MAX];
+};
+
+static int dr_arg_pool_alloc_objs(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj, *tmp_arg;
+ struct list_head cur_list;
+ u16 object_range;
+ int num_of_objects;
+ u32 obj_id = 0;
+ int i, ret;
+
+ INIT_LIST_HEAD(&cur_list);
+
+ object_range =
+ pool->dmn->info.caps.log_header_modify_argument_granularity;
+
+ object_range =
+ max_t(u32, pool->dmn->info.caps.log_header_modify_argument_granularity,
+ DR_ICM_MODIFY_HDR_GRANULARITY_4K);
+ object_range =
+ min_t(u32, pool->dmn->info.caps.log_header_modify_argument_max_alloc,
+ object_range);
+
+ if (pool->log_chunk_size > object_range) {
+ mlx5dr_err(pool->dmn, "Required chunk size (%d) is not supported\n",
+ pool->log_chunk_size);
+ return -ENOMEM;
+ }
+
+ num_of_objects = (1 << (object_range - pool->log_chunk_size));
+ /* Only one devx object per range */
+ ret = mlx5dr_cmd_create_modify_header_arg(pool->dmn->mdev,
+ object_range,
+ pool->dmn->pdn,
+ &obj_id);
+ if (ret) {
+ mlx5dr_err(pool->dmn, "failed allocating object with range: %d:\n",
+ object_range);
+ return -EAGAIN;
+ }
+
+ for (i = 0; i < num_of_objects; i++) {
+ arg_obj = kzalloc(sizeof(*arg_obj), GFP_KERNEL);
+ if (!arg_obj) {
+ ret = -ENOMEM;
+ goto clean_arg_obj;
+ }
+
+ arg_obj->log_chunk_size = pool->log_chunk_size;
+
+ list_add_tail(&arg_obj->list_node, &cur_list);
+
+ arg_obj->obj_id = obj_id;
+ arg_obj->obj_offset = i * (1 << pool->log_chunk_size);
+ }
+ list_splice_tail_init(&cur_list, &pool->free_list);
+
+ return 0;
+
+clean_arg_obj:
+ mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, obj_id);
+ list_for_each_entry_safe(arg_obj, tmp_arg, &cur_list, list_node) {
+ list_del(&arg_obj->list_node);
+ kfree(arg_obj);
+ }
+ return ret;
+}
+
+static struct mlx5dr_arg_obj *dr_arg_pool_get_arg_obj(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj = NULL;
+ int ret;
+
+ mutex_lock(&pool->mutex);
+ if (list_empty(&pool->free_list)) {
+ ret = dr_arg_pool_alloc_objs(pool);
+ if (ret)
+ goto out;
+ }
+
+ arg_obj = list_first_entry_or_null(&pool->free_list,
+ struct mlx5dr_arg_obj,
+ list_node);
+ WARN(!arg_obj, "couldn't get dr arg obj from pool");
+
+ if (arg_obj)
+ list_del_init(&arg_obj->list_node);
+
+out:
+ mutex_unlock(&pool->mutex);
+ return arg_obj;
+}
+
+static void dr_arg_pool_put_arg_obj(struct dr_arg_pool *pool,
+ struct mlx5dr_arg_obj *arg_obj)
+{
+ mutex_lock(&pool->mutex);
+ list_add(&arg_obj->list_node, &pool->free_list);
+ mutex_unlock(&pool->mutex);
+}
+
+static struct dr_arg_pool *dr_arg_pool_create(struct mlx5dr_domain *dmn,
+ enum dr_arg_chunk_size chunk_size)
+{
+ struct dr_arg_pool *pool;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->dmn = dmn;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ mutex_init(&pool->mutex);
+
+ pool->log_chunk_size = chunk_size;
+ if (dr_arg_pool_alloc_objs(pool))
+ goto free_pool;
+
+ return pool;
+
+free_pool:
+ kfree(pool);
+
+ return NULL;
+}
+
+static void dr_arg_pool_destroy(struct dr_arg_pool *pool)
+{
+ struct mlx5dr_arg_obj *arg_obj, *tmp_arg;
+
+ list_for_each_entry_safe(arg_obj, tmp_arg, &pool->free_list, list_node) {
+ list_del(&arg_obj->list_node);
+ if (!arg_obj->obj_offset) /* the first in range */
+ mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, arg_obj->obj_id);
+ kfree(arg_obj);
+ }
+
+ mutex_destroy(&pool->mutex);
+ kfree(pool);
+}
+
+static enum dr_arg_chunk_size dr_arg_get_chunk_size(u16 num_of_actions)
+{
+ if (num_of_actions <= 8)
+ return DR_ARG_CHUNK_SIZE_1;
+ if (num_of_actions <= 16)
+ return DR_ARG_CHUNK_SIZE_2;
+ if (num_of_actions <= 32)
+ return DR_ARG_CHUNK_SIZE_3;
+ if (num_of_actions <= 64)
+ return DR_ARG_CHUNK_SIZE_4;
+
+ return DR_ARG_CHUNK_SIZE_MAX;
+}
+
+u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj)
+{
+ return (arg_obj->obj_id + arg_obj->obj_offset);
+}
+
+struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data)
+{
+ u32 size = dr_arg_get_chunk_size(num_of_actions);
+ struct mlx5dr_arg_obj *arg_obj;
+ int ret;
+
+ if (size >= DR_ARG_CHUNK_SIZE_MAX)
+ return NULL;
+
+ arg_obj = dr_arg_pool_get_arg_obj(mgr->pools[size]);
+ if (!arg_obj) {
+ mlx5dr_err(mgr->dmn, "Failed allocating args object for modify header\n");
+ return NULL;
+ }
+
+ /* write it into the hw */
+ ret = mlx5dr_send_postsend_args(mgr->dmn,
+ mlx5dr_arg_get_obj_id(arg_obj),
+ num_of_actions, data);
+ if (ret) {
+ mlx5dr_err(mgr->dmn, "Failed writing args object\n");
+ goto put_obj;
+ }
+
+ return arg_obj;
+
+put_obj:
+ mlx5dr_arg_put_obj(mgr, arg_obj);
+ return NULL;
+}
+
+void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr,
+ struct mlx5dr_arg_obj *arg_obj)
+{
+ dr_arg_pool_put_arg_obj(mgr->pools[arg_obj->log_chunk_size], arg_obj);
+}
+
+struct mlx5dr_arg_mgr*
+mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_arg_mgr *pool_mgr;
+ int i;
+
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return NULL;
+
+ pool_mgr = kzalloc(sizeof(*pool_mgr), GFP_KERNEL);
+ if (!pool_mgr)
+ return NULL;
+
+ pool_mgr->dmn = dmn;
+
+ for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) {
+ pool_mgr->pools[i] = dr_arg_pool_create(dmn, i);
+ if (!pool_mgr->pools[i])
+ goto clean_pools;
+ }
+
+ return pool_mgr;
+
+clean_pools:
+ for (i--; i >= 0; i--)
+ dr_arg_pool_destroy(pool_mgr->pools[i]);
+
+ kfree(pool_mgr);
+ return NULL;
+}
+
+void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr)
+{
+ struct dr_arg_pool **pools;
+ int i;
+
+ if (!mgr)
+ return;
+
+ pools = mgr->pools;
+ for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++)
+ dr_arg_pool_destroy(pools[i]);
+
+ kfree(mgr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 07b6a6dcb92f..3835ba3f4dda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -132,6 +132,17 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
+ caps->support_modify_argument =
+ MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT;
+
+ if (caps->support_modify_argument) {
+ caps->log_header_modify_argument_granularity =
+ MLX5_CAP_GEN(mdev, log_header_modify_argument_granularity);
+ caps->log_header_modify_argument_max_alloc =
+ MLX5_CAP_GEN(mdev, log_header_modify_argument_max_alloc);
+ }
+
/* geneve_tlv_option_0_exist is the indication of
* STE support for lookup type flex_parser_ok
*/
@@ -200,6 +211,12 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->hdr_modify_icm_addr =
MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address);
+ caps->log_modify_pattern_icm_size =
+ MLX5_CAP_DEV_MEM(mdev, log_header_modify_pattern_sw_icm_size);
+
+ caps->hdr_modify_pattern_icm_addr =
+ MLX5_CAP64_DEV_MEM(mdev, header_modify_pattern_sw_icm_start_address);
+
caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev);
@@ -676,6 +693,49 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
return 0;
}
+int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev,
+ u16 log_obj_range, u32 pd,
+ u32 *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ void *attr;
+ int ret;
+
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, attr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, attr, obj_type,
+ MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT);
+ MLX5_SET(general_obj_in_cmd_hdr, attr,
+ op_param.create.log_obj_range, log_obj_range);
+
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg);
+ MLX5_SET(modify_header_arg, attr, access_pd, pd);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return 0;
+}
+
+void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev,
+ u32 obj_id)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
struct mlx5dr_cmd_fte_info *fte,
bool *extended_dest)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index db81d881d38e..1ff8bde90e1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -140,10 +140,31 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
action->flow_tag->flow_tag);
break;
case DR_ACTION_TYP_MODIFY_HDR:
- seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ {
+ struct mlx5dr_ptrn_obj *ptrn = action->rewrite->ptrn;
+ struct mlx5dr_arg_obj *arg = action->rewrite->arg;
+ u8 *rewrite_data = action->rewrite->data;
+ bool ptrn_arg;
+ int i;
+
+ ptrn_arg = !action->rewrite->single_action_opt && ptrn && arg;
+
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x",
DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
- rule_id, action->rewrite->index);
+ rule_id, action->rewrite->index,
+ action->rewrite->single_action_opt,
+ action->rewrite->num_of_actions,
+ ptrn_arg ? ptrn->index : 0,
+ ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0);
+
+ for (i = 0; i < action->rewrite->num_of_actions; i++) {
+ seq_printf(file, ",0x%016llx",
+ be64_to_cpu(((__be64 *)rewrite_data)[i]));
+ }
+
+ seq_puts(file, "\n");
break;
+ }
case DR_ACTION_TYP_VPORT:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
@@ -157,7 +178,10 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
case DR_ACTION_TYP_TNL_L3_TO_L2:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
- rule_id, action->rewrite->index);
+ rule_id,
+ (action->rewrite->ptrn && action->rewrite->arg) ?
+ mlx5dr_arg_get_obj_id(action->rewrite->arg) :
+ action->rewrite->index);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 5b8bb2ca31e6..9a2dfe6ebe31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -10,6 +10,46 @@
((dmn)->info.caps.dmn_type##_sw_owner_v2 && \
(dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7))
+bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn)
+{
+ return dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX &&
+ dmn->info.caps.support_modify_argument;
+}
+
+static int dr_domain_init_modify_header_resources(struct mlx5dr_domain *dmn)
+{
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return 0;
+
+ dmn->ptrn_mgr = mlx5dr_ptrn_mgr_create(dmn);
+ if (!dmn->ptrn_mgr) {
+ mlx5dr_err(dmn, "Couldn't create ptrn_mgr\n");
+ return -ENOMEM;
+ }
+
+ /* create argument pool */
+ dmn->arg_mgr = mlx5dr_arg_mgr_create(dmn);
+ if (!dmn->arg_mgr) {
+ mlx5dr_err(dmn, "Couldn't create arg_mgr\n");
+ goto free_modify_header_pattern;
+ }
+
+ return 0;
+
+free_modify_header_pattern:
+ mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr);
+ return -ENOMEM;
+}
+
+static void dr_domain_destroy_modify_header_resources(struct mlx5dr_domain *dmn)
+{
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return;
+
+ mlx5dr_arg_mgr_destroy(dmn->arg_mgr);
+ mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr);
+}
+
static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn)
{
/* Per vport cached FW FT for checksum recalculation, this
@@ -149,14 +189,22 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
goto clean_uar;
}
+ ret = dr_domain_init_modify_header_resources(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Couldn't create modify-header-resources\n");
+ goto clean_mem_resources;
+ }
+
ret = mlx5dr_send_ring_alloc(dmn);
if (ret) {
mlx5dr_err(dmn, "Couldn't create send-ring\n");
- goto clean_mem_resources;
+ goto clean_modify_hdr;
}
return 0;
+clean_modify_hdr:
+ dr_domain_destroy_modify_header_resources(dmn);
clean_mem_resources:
dr_domain_uninit_mem_resources(dmn);
clean_uar:
@@ -170,6 +218,7 @@ clean_pd:
static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
{
mlx5dr_send_ring_free(dmn, dmn->send_ring);
+ dr_domain_destroy_modify_header_resources(dmn);
dr_domain_uninit_mem_resources(dmn);
mlx5_put_uars_page(dmn->mdev, dmn->uar);
mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
@@ -215,7 +264,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
return 0;
}
-static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
+static int dr_domain_query_esw_mgr(struct mlx5dr_domain *dmn)
{
return dr_domain_query_vport(dmn, 0, false,
&dmn->info.caps.vports.esw_manager_caps);
@@ -321,7 +370,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
* vports (vport 0, VFs and SFs) will be queried dynamically.
*/
- ret = dr_domain_query_esw_mngr(dmn);
+ ret = dr_domain_query_esw_mgr(dmn);
if (ret) {
mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret);
goto free_vports_caps_xa;
@@ -435,6 +484,9 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K,
dmn->info.caps.log_icm_size);
+ dmn->info.max_log_modify_hdr_pattern_icm_sz =
+ min_t(u32, DR_CHUNK_SIZE_4K,
+ dmn->info.caps.log_modify_pattern_icm_size);
if (!dmn->info.supp_sw_steering) {
mlx5dr_err(dmn, "SW steering is not supported\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 3eb6719bc8eb..04fc170a6c16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -107,9 +107,9 @@ static struct mlx5dr_icm_mr *
dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5_core_dev *mdev = pool->dmn->mdev;
- enum mlx5_sw_icm_type dm_type;
+ enum mlx5_sw_icm_type dm_type = 0;
struct mlx5dr_icm_mr *icm_mr;
- size_t log_align_base;
+ size_t log_align_base = 0;
int err;
icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
@@ -121,14 +121,25 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
pool->icm_type);
- if (pool->icm_type == DR_ICM_TYPE_STE) {
+ switch (pool->icm_type) {
+ case DR_ICM_TYPE_STE:
dm_type = MLX5_SW_ICM_TYPE_STEERING;
log_align_base = ilog2(icm_mr->dm.length);
- } else {
+ break;
+ case DR_ICM_TYPE_MODIFY_ACTION:
dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
/* Align base is 64B */
log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ break;
+ case DR_ICM_TYPE_MODIFY_HDR_PTRN:
+ dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ /* Align base is 64B */
+ log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ break;
+ default:
+ WARN_ON(pool->icm_type);
}
+
icm_mr->dm.type = dm_type;
err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
@@ -493,27 +504,33 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
enum mlx5dr_icm_type icm_type)
{
u32 num_of_chunks, entry_size, max_hot_size;
- enum mlx5dr_icm_chunk_size max_log_chunk_sz;
struct mlx5dr_icm_pool *pool;
- if (icm_type == DR_ICM_TYPE_STE)
- max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
- else
- max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
-
pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool)
return NULL;
pool->dmn = dmn;
pool->icm_type = icm_type;
- pool->max_log_chunk_sz = max_log_chunk_sz;
pool->chunks_kmem_cache = dmn->chunks_kmem_cache;
INIT_LIST_HEAD(&pool->buddy_mem_list);
-
mutex_init(&pool->mutex);
+ switch (icm_type) {
+ case DR_ICM_TYPE_STE:
+ pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
+ break;
+ case DR_ICM_TYPE_MODIFY_ACTION:
+ pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+ break;
+ case DR_ICM_TYPE_MODIFY_HDR_PTRN:
+ pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz;
+ break;
+ default:
+ WARN_ON(icm_type);
+ }
+
entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type);
max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
new file mode 100644
index 000000000000..13e06a6a6b22
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "dr_types.h"
+#include "mlx5_ifc_dr_ste_v1.h"
+
+enum dr_ptrn_modify_hdr_action_id {
+ DR_PTRN_MODIFY_HDR_ACTION_ID_NOP = 0x00,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_COPY = 0x05,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_SET = 0x06,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_ADD = 0x07,
+ DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE = 0x0a,
+};
+
+struct mlx5dr_ptrn_mgr {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_icm_pool *ptrn_icm_pool;
+ /* cache for modify_header ptrn */
+ struct list_head ptrn_list;
+ struct mutex modify_hdr_mutex; /* protect the pattern cache */
+};
+
+/* Cache structure and functions */
+static bool dr_ptrn_compare_modify_hdr(size_t cur_num_of_actions,
+ __be64 cur_hw_actions[],
+ size_t num_of_actions,
+ __be64 hw_actions[])
+{
+ int i;
+
+ if (cur_num_of_actions != num_of_actions)
+ return false;
+
+ for (i = 0; i < num_of_actions; i++) {
+ u8 action_id =
+ MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id);
+
+ if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_COPY) {
+ if (hw_actions[i] != cur_hw_actions[i])
+ return false;
+ } else {
+ if ((__force __be32)hw_actions[i] !=
+ (__force __be32)cur_hw_actions[i])
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static struct mlx5dr_ptrn_obj *
+dr_ptrn_find_cached_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ size_t num_of_actions,
+ __be64 hw_actions[])
+{
+ struct mlx5dr_ptrn_obj *cached_pattern;
+ struct mlx5dr_ptrn_obj *tmp;
+
+ list_for_each_entry_safe(cached_pattern, tmp, &mgr->ptrn_list, list) {
+ if (dr_ptrn_compare_modify_hdr(cached_pattern->num_of_actions,
+ (__be64 *)cached_pattern->data,
+ num_of_actions,
+ hw_actions)) {
+ /* Put this pattern in the head of the list,
+ * as we will probably use it more.
+ */
+ list_del_init(&cached_pattern->list);
+ list_add(&cached_pattern->list, &mgr->ptrn_list);
+ return cached_pattern;
+ }
+ }
+
+ return NULL;
+}
+
+static struct mlx5dr_ptrn_obj *
+dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions, u8 *data)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ struct mlx5dr_icm_chunk *chunk;
+ u32 chunk_size;
+ u32 index;
+
+ chunk_size = ilog2(num_of_actions);
+ /* HW modify action index granularity is at least 64B */
+ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
+
+ chunk = mlx5dr_icm_alloc_chunk(mgr->ptrn_icm_pool, chunk_size);
+ if (!chunk)
+ return NULL;
+
+ index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
+ mgr->dmn->info.caps.hdr_modify_pattern_icm_addr) /
+ DR_ACTION_CACHE_LINE_SIZE;
+
+ pattern = kzalloc(sizeof(*pattern), GFP_KERNEL);
+ if (!pattern)
+ goto free_chunk;
+
+ pattern->data = kzalloc(num_of_actions * DR_MODIFY_ACTION_SIZE *
+ sizeof(*pattern->data), GFP_KERNEL);
+ if (!pattern->data)
+ goto free_pattern;
+
+ memcpy(pattern->data, data, num_of_actions * DR_MODIFY_ACTION_SIZE);
+ pattern->chunk = chunk;
+ pattern->index = index;
+ pattern->num_of_actions = num_of_actions;
+
+ list_add(&pattern->list, &mgr->ptrn_list);
+ refcount_set(&pattern->refcount, 1);
+
+ return pattern;
+
+free_pattern:
+ kfree(pattern);
+free_chunk:
+ mlx5dr_icm_free_chunk(chunk);
+ return NULL;
+}
+
+static void
+dr_ptrn_free_pattern(struct mlx5dr_ptrn_obj *pattern)
+{
+ list_del(&pattern->list);
+ mlx5dr_icm_free_chunk(pattern->chunk);
+ kfree(pattern->data);
+ kfree(pattern);
+}
+
+struct mlx5dr_ptrn_obj *
+mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ u64 *hw_actions;
+ u8 action_id;
+ int i;
+
+ mutex_lock(&mgr->modify_hdr_mutex);
+ pattern = dr_ptrn_find_cached_pattern(mgr,
+ num_of_actions,
+ (__be64 *)data);
+ if (!pattern) {
+ /* Alloc and add new pattern to cache */
+ pattern = dr_ptrn_alloc_pattern(mgr, num_of_actions, data);
+ if (!pattern)
+ goto out_unlock;
+
+ hw_actions = (u64 *)pattern->data;
+ /* Here we mask the pattern data to create a valid pattern
+ * since we do an OR operation between the arg and pattern
+ */
+ for (i = 0; i < num_of_actions; i++) {
+ action_id = MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id);
+
+ if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_SET ||
+ action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_ADD ||
+ action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE)
+ MLX5_SET(ste_double_action_set_v1, &hw_actions[i], inline_data, 0);
+ }
+
+ if (mlx5dr_send_postsend_pattern(mgr->dmn, pattern->chunk,
+ num_of_actions, pattern->data)) {
+ refcount_dec(&pattern->refcount);
+ goto free_pattern;
+ }
+ } else {
+ refcount_inc(&pattern->refcount);
+ }
+
+ mutex_unlock(&mgr->modify_hdr_mutex);
+
+ return pattern;
+
+free_pattern:
+ dr_ptrn_free_pattern(pattern);
+out_unlock:
+ mutex_unlock(&mgr->modify_hdr_mutex);
+ return NULL;
+}
+
+void
+mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ struct mlx5dr_ptrn_obj *pattern)
+{
+ mutex_lock(&mgr->modify_hdr_mutex);
+
+ if (refcount_dec_and_test(&pattern->refcount))
+ dr_ptrn_free_pattern(pattern);
+
+ mutex_unlock(&mgr->modify_hdr_mutex);
+}
+
+struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_ptrn_mgr *mgr;
+
+ if (!mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return NULL;
+
+ mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+ if (!mgr)
+ return NULL;
+
+ mgr->dmn = dmn;
+ mgr->ptrn_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_HDR_PTRN);
+ if (!mgr->ptrn_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get modify-header-pattern memory\n");
+ goto free_mgr;
+ }
+
+ INIT_LIST_HEAD(&mgr->ptrn_list);
+ return mgr;
+
+free_mgr:
+ kfree(mgr);
+ return NULL;
+}
+
+void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr)
+{
+ struct mlx5dr_ptrn_obj *pattern;
+ struct mlx5dr_ptrn_obj *tmp;
+
+ if (!mgr)
+ return;
+
+ WARN_ON(!list_empty(&mgr->ptrn_list));
+
+ list_for_each_entry_safe(pattern, tmp, &mgr->ptrn_list, list) {
+ list_del(&pattern->list);
+ kfree(pattern->data);
+ kfree(pattern);
+ }
+
+ mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool);
+ kfree(mgr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index fd2d31cdbcf9..4a5ae86e2b62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -18,7 +18,13 @@ struct dr_data_seg {
unsigned int send_flags;
};
+enum send_info_type {
+ WRITE_ICM = 0,
+ GTA_ARG = 1,
+};
+
struct postsend_info {
+ enum send_info_type type;
struct dr_data_seg write;
struct dr_data_seg read;
u64 remote_addr;
@@ -261,9 +267,10 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
dr_qp->rq.pc = 0;
dr_qp->rq.cc = 0;
- dr_qp->rq.wqe_cnt = 4;
+ dr_qp->rq.wqe_cnt = 256;
dr_qp->sq.pc = 0;
dr_qp->sq.cc = 0;
+ dr_qp->sq.head = 0;
dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
@@ -362,39 +369,113 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
}
-static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
- u32 rkey, struct dr_data_seg *data_seg,
- u32 opcode, bool notify_hw)
+static void
+dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ u32 remote_addr,
+ struct dr_data_seg *data_seg,
+ int *size)
{
- struct mlx5_wqe_raddr_seg *wq_raddr;
- struct mlx5_wqe_ctrl_seg *wq_ctrl;
- struct mlx5_wqe_data_seg *wq_dseg;
- unsigned int size;
- unsigned int idx;
+ struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
+ struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
- size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
- sizeof(*wq_raddr) / 16;
+ wq_ctrl->general_id = cpu_to_be32(remote_addr);
+ wq_flow_seg = (void *)(wq_ctrl + 1);
- idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+ /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
+ memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
+ wq_arg_seg = (void *)(wq_flow_seg + 1);
+
+ memcpy(wq_arg_seg->argument_list,
+ (void *)(uintptr_t)data_seg->addr,
+ data_seg->length);
+
+ *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
+ sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */
+ sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */
+ MLX5_SEND_WQE_DS;
+}
+
+static void
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ u64 remote_addr,
+ u32 rkey,
+ struct dr_data_seg *data_seg,
+ unsigned int *size)
+{
+ struct mlx5_wqe_raddr_seg *wq_raddr;
+ struct mlx5_wqe_data_seg *wq_dseg;
- wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
- wq_ctrl->imm = 0;
- wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
- MLX5_WQE_CTRL_CQ_UPDATE : 0;
- wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
- opcode);
- wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
wq_raddr = (void *)(wq_ctrl + 1);
+
wq_raddr->raddr = cpu_to_be64(remote_addr);
wq_raddr->rkey = cpu_to_be32(rkey);
wq_raddr->reserved = 0;
wq_dseg = (void *)(wq_raddr + 1);
+
wq_dseg->byte_count = cpu_to_be32(data_seg->length);
wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
wq_dseg->addr = cpu_to_be64(data_seg->addr);
- dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+ *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
+ sizeof(*wq_dseg) + /* WQE data segment */
+ sizeof(*wq_raddr)) / /* WQE remote addr segment */
+ MLX5_SEND_WQE_DS;
+}
+
+static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+ struct dr_data_seg *data_seg)
+{
+ wq_ctrl->signature = 0;
+ wq_ctrl->rsvd[0] = 0;
+ wq_ctrl->rsvd[1] = 0;
+ wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0;
+ wq_ctrl->imm = 0;
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+ u32 rkey, struct dr_data_seg *data_seg,
+ u32 opcode, bool notify_hw)
+{
+ struct mlx5_wqe_ctrl_seg *wq_ctrl;
+ int opcode_mod = 0;
+ unsigned int size;
+ unsigned int idx;
+
+ idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+ wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+ dr_set_ctrl_seg(wq_ctrl, data_seg);
+
+ switch (opcode) {
+ case MLX5_OPCODE_RDMA_READ:
+ case MLX5_OPCODE_RDMA_WRITE:
+ dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
+ rkey, data_seg, &size);
+ break;
+ case MLX5_OPCODE_FLOW_TBL_ACCESS:
+ opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
+ dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
+ data_seg, &size);
+ break;
+ default:
+ WARN(true, "illegal opcode %d", opcode);
+ return;
+ }
+
+ /* --------------------------------------------------------
+ * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
+ * --------------------------------------------------------
+ */
+ wq_ctrl->opmod_idx_opcode =
+ cpu_to_be32((opcode_mod << 24) |
+ ((dr_qp->sq.pc & 0xffff) << 8) |
+ opcode);
+ wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
+
+ dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
if (notify_hw)
dr_cmd_notify_hw(dr_qp, wq_ctrl);
@@ -402,10 +483,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
{
- dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
- dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
- &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+ if (send_info->type == WRITE_ICM) {
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+ } else { /* GTA_ARG */
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
+ }
+
}
/**
@@ -471,24 +558,54 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
} else if (ne == 1) {
send_ring->pending_wqe -= send_ring->signal_th;
}
- } while (is_drain && send_ring->pending_wqe);
+ } while (ne == 1 ||
+ (is_drain && send_ring->pending_wqe >= send_ring->signal_th));
return 0;
}
-static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
- struct postsend_info *send_info)
+static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
{
send_ring->pending_wqe++;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
+ else
+ send_info->write.send_flags = 0;
+}
+
+static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ u32 buff_offset;
+
+ if (send_info->write.length > dmn->info.max_inline_size) {
+ buff_offset = (send_ring->tx_head &
+ (dmn->send_ring->signal_th - 1)) *
+ send_ring->max_post_send_size;
+ /* Copy to ring mr */
+ memcpy(send_ring->buf + buff_offset,
+ (void *)(uintptr_t)send_info->write.addr,
+ send_info->write.length);
+ send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
+ send_info->write.lkey = send_ring->mr->mkey;
+
+ send_ring->tx_head++;
+ }
+
+ send_ring->pending_wqe++;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->write.send_flags |= IB_SEND_SIGNALED;
send_ring->pending_wqe++;
send_info->read.length = send_info->write.length;
- /* Read into the same write area */
- send_info->read.addr = (uintptr_t)send_info->write.addr;
- send_info->read.lkey = send_ring->mr->mkey;
+
+ /* Read into dedicated sync buffer */
+ send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
+ send_info->read.lkey = send_ring->sync_mr->mkey;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->read.send_flags = IB_SEND_SIGNALED;
@@ -496,11 +613,20 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
send_info->read.send_flags = 0;
}
+static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ if (send_info->type == WRITE_ICM)
+ dr_fill_write_icm_segs(dmn, send_ring, send_info);
+ else /* args */
+ dr_fill_write_args_segs(send_ring, send_info);
+}
+
static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
struct postsend_info *send_info)
{
struct mlx5dr_send_ring *send_ring = dmn->send_ring;
- u32 buff_offset;
int ret;
if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
@@ -517,20 +643,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
if (ret)
goto out_unlock;
- if (send_info->write.length > dmn->info.max_inline_size) {
- buff_offset = (send_ring->tx_head &
- (dmn->send_ring->signal_th - 1)) *
- send_ring->max_post_send_size;
- /* Copy to ring mr */
- memcpy(send_ring->buf + buff_offset,
- (void *)(uintptr_t)send_info->write.addr,
- send_info->write.length);
- send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
- send_info->write.lkey = send_ring->mr->mkey;
- }
-
- send_ring->tx_head++;
- dr_fill_data_segs(send_ring, send_info);
+ dr_fill_data_segs(dmn, send_ring, send_info);
dr_post_send(send_ring->qp, send_info);
out_unlock:
@@ -736,6 +849,59 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
return dr_postsend_icm_data(dmn, &send_info);
}
+int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
+ struct mlx5dr_icm_chunk *chunk,
+ u16 num_of_actions,
+ u8 *data)
+{
+ struct postsend_info send_info = {};
+ int ret;
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
+ send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
+ u16 num_of_actions, u8 *actions_data)
+{
+ int data_len, iter = 0, cur_sent;
+ u64 addr;
+ int ret;
+
+ addr = (uintptr_t)actions_data;
+ data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
+
+ do {
+ struct postsend_info send_info = {};
+
+ send_info.type = GTA_ARG;
+ send_info.write.addr = addr;
+ cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
+ send_info.write.length = cur_sent;
+ send_info.write.lkey = 0;
+ send_info.remote_addr = arg_id + iter;
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out;
+
+ iter++;
+ addr += cur_sent;
+ data_len -= cur_sent;
+ } while (data_len > 0);
+
+out:
+ return ret;
+}
+
static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
struct mlx5dr_qp *dr_qp,
int port)
@@ -1123,16 +1289,25 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
goto free_mem;
}
+ dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
+ GFP_KERNEL);
+ if (!dmn->send_ring->sync_buff) {
+ ret = -ENOMEM;
+ goto clean_mr;
+ }
+
dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
dmn->pdn, dmn->send_ring->sync_buff,
- MIN_READ_SYNC);
+ dmn->send_ring->max_post_send_size);
if (!dmn->send_ring->sync_mr) {
ret = -ENOMEM;
- goto clean_mr;
+ goto free_sync_mem;
}
return 0;
+free_sync_mem:
+ kfree(dmn->send_ring->sync_buff);
clean_mr:
dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
free_mem:
@@ -1155,6 +1330,7 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
dr_dereg_mr(dmn->mdev, send_ring->mr);
kfree(send_ring->buf);
+ kfree(send_ring->sync_buff);
kfree(send_ring);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 1e15f605df6e..9413aaf51251 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -633,6 +633,63 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
used_hw_action_num);
}
+static int
+dr_ste_alloc_modify_hdr_chunk(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ u32 chunk_size;
+ int ret;
+
+ chunk_size = ilog2(roundup_pow_of_two(action->rewrite->num_of_actions));
+
+ /* HW modify action index granularity is at least 64B */
+ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
+
+ action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+ chunk_size);
+ if (!action->rewrite->chunk)
+ return -ENOMEM;
+
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(action->rewrite->chunk) -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ DR_ACTION_CACHE_LINE_SIZE;
+
+ ret = mlx5dr_send_postsend_action(action->rewrite->dmn, action);
+ if (ret)
+ goto free_chunk;
+
+ return 0;
+
+free_chunk:
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ return -ENOMEM;
+}
+
+static void dr_ste_free_modify_hdr_chunk(struct mlx5dr_action *action)
+{
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+}
+
+int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return dmn->ste_ctx->alloc_modify_hdr_chunk(action);
+
+ return dr_ste_alloc_modify_hdr_chunk(action);
+}
+
+void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (mlx5dr_domain_is_support_ptrn_arg(dmn))
+ return dmn->ste_ctx->dealloc_modify_hdr_chunk(action);
+
+ return dr_ste_free_modify_hdr_chunk(action);
+}
+
static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
struct mlx5dr_match_spec *spec)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 7075142bcfb6..54a6619c3ecb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -195,6 +195,8 @@ struct mlx5dr_ste_ctx {
u8 *hw_action,
u32 hw_action_sz,
u16 *used_hw_action_num);
+ int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action);
+ void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action);
/* Send */
void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 084145f18084..4c0704ad166b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -495,21 +495,66 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
- u8 *s_action,
- u16 num_of_actions,
- u32 re_write_index)
+static void dr_ste_v1_set_accelerated_rewrite_actions(u8 *hw_ste_p,
+ u8 *d_action,
+ u16 num_of_actions,
+ u32 rewrite_pattern,
+ u32 rewrite_args,
+ u8 *action_data)
+{
+ if (action_data) {
+ memcpy(d_action, action_data, DR_MODIFY_ACTION_SIZE);
+ } else {
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_ACCELERATED_LIST);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ modify_actions_pattern_pointer, rewrite_pattern);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ number_of_modify_actions, num_of_actions);
+ MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action,
+ modify_actions_argument_pointer, rewrite_args);
+ }
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_basic_rewrite_actions(u8 *hw_ste_p,
+ u8 *s_action,
+ u16 num_of_actions,
+ u32 rewrite_index)
{
MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
DR_STE_V1_ACTION_ID_MODIFY_LIST);
MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
num_of_actions);
MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
- re_write_index);
+ rewrite_index);
dr_ste_v1_set_reparse(hw_ste_p);
}
+static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
+ u8 *action,
+ u16 num_of_actions,
+ u32 rewrite_pattern,
+ u32 rewrite_args,
+ u8 *action_data)
+{
+ if (rewrite_pattern != MLX5DR_INVALID_PATTERN_INDEX)
+ return dr_ste_v1_set_accelerated_rewrite_actions(hw_ste_p,
+ action,
+ num_of_actions,
+ rewrite_pattern,
+ rewrite_args,
+ action_data);
+
+ /* fall back to the code that doesn't support accelerated modify header */
+ return dr_ste_v1_set_basic_rewrite_actions(hw_ste_p,
+ action,
+ num_of_actions,
+ rewrite_args);
+}
+
static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
u32 object_id,
u32 offset,
@@ -604,9 +649,6 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
allow_modify_hdr = false;
}
- if (action_type_set[DR_ACTION_TYP_CTR])
- dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
-
if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
@@ -617,7 +659,9 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
}
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->modify_actions,
- attr->modify_index);
+ attr->modify_pat_idx,
+ attr->modify_index,
+ attr->single_modify_action);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_encap = false;
@@ -724,6 +768,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
attr->range.max);
}
+ /* set counter ID on the last STE to adhere to DMFS behavior */
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
@@ -743,7 +791,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->decap_actions,
- attr->decap_index);
+ attr->decap_pat_idx,
+ attr->decap_index,
+ NULL);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_modify_hdr = false;
@@ -798,7 +848,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
}
dr_ste_v1_set_rewrite_actions(last_ste, action,
attr->modify_actions,
- attr->modify_index);
+ attr->modify_pat_idx,
+ attr->modify_index,
+ attr->single_modify_action);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
}
@@ -2175,6 +2227,49 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
}
+int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action)
+{
+ struct mlx5dr_ptrn_mgr *ptrn_mgr;
+ int ret;
+
+ ptrn_mgr = action->rewrite->dmn->ptrn_mgr;
+ if (!ptrn_mgr)
+ return -EOPNOTSUPP;
+
+ action->rewrite->arg = mlx5dr_arg_get_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->num_of_actions,
+ action->rewrite->data);
+ if (!action->rewrite->arg) {
+ mlx5dr_err(action->rewrite->dmn, "Failed allocating args for modify header\n");
+ return -EAGAIN;
+ }
+
+ action->rewrite->ptrn =
+ mlx5dr_ptrn_cache_get_pattern(ptrn_mgr,
+ action->rewrite->num_of_actions,
+ action->rewrite->data);
+ if (!action->rewrite->ptrn) {
+ mlx5dr_err(action->rewrite->dmn, "Failed to get pattern\n");
+ ret = -EAGAIN;
+ goto put_arg;
+ }
+
+ return 0;
+
+put_arg:
+ mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->arg);
+ return ret;
+}
+
+void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action)
+{
+ mlx5dr_ptrn_cache_put_pattern(action->rewrite->dmn->ptrn_mgr,
+ action->rewrite->ptrn);
+ mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr,
+ action->rewrite->arg);
+}
+
static struct mlx5dr_ste_ctx ste_ctx_v1 = {
/* Builders */
.build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
@@ -2231,6 +2326,9 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = {
.set_action_add = &dr_ste_v1_set_action_add,
.set_action_copy = &dr_ste_v1_set_action_copy,
.set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg,
+ .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg,
+
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
index b5c0f0f8392f..e2fc69867088 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -31,6 +31,8 @@ void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter,
u8 dst_len, u8 src_hw_field, u8 src_shifter);
int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action,
u32 hw_action_sz, u16 *used_hw_action_num);
+int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action);
+void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action);
void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask);
void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
index cf1a3c9a1cf4..808b013cf48c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -221,6 +221,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = {
.set_action_add = &dr_ste_v1_set_action_add,
.set_action_copy = &dr_ste_v1_set_action_copy,
.set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg,
+ .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg,
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 2b769dcbd453..37b7b1a79f93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -21,11 +21,16 @@
#define DR_NUM_OF_FLEX_PARSERS 8
#define DR_STE_MAX_FLEX_0_ID 3
#define DR_STE_MAX_FLEX_1_ID 7
+#define DR_ACTION_CACHE_LINE_SIZE 64
#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+struct mlx5dr_ptrn_mgr;
+struct mlx5dr_arg_mgr;
+struct mlx5dr_arg_obj;
+
static inline bool dr_is_flex_parser_0_id(u8 parser_id)
{
return parser_id <= DR_STE_MAX_FLEX_0_ID;
@@ -66,6 +71,7 @@ enum mlx5dr_icm_chunk_size {
enum mlx5dr_icm_type {
DR_ICM_TYPE_STE,
DR_ICM_TYPE_MODIFY_ACTION,
+ DR_ICM_TYPE_MODIFY_HDR_PTRN,
};
static inline enum mlx5dr_icm_chunk_size
@@ -255,11 +261,15 @@ u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
#define MLX5DR_MAX_VLANS 2
+#define MLX5DR_INVALID_PATTERN_INDEX 0xffffffff
struct mlx5dr_ste_actions_attr {
u32 modify_index;
+ u32 modify_pat_idx;
u16 modify_actions;
+ u8 *single_modify_action;
u32 decap_index;
+ u32 decap_pat_idx;
u16 decap_actions;
u8 decap_with_vlan:1;
u64 final_icm_addr;
@@ -331,6 +341,8 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
u8 *hw_action,
u32 hw_action_sz,
u16 *used_hw_action_num);
+int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action);
+void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action);
const struct mlx5dr_ste_action_modify_field *
mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field);
@@ -861,6 +873,8 @@ struct mlx5dr_cmd_caps {
u64 esw_tx_drop_address;
u32 log_icm_size;
u64 hdr_modify_icm_addr;
+ u32 log_modify_pattern_icm_size;
+ u64 hdr_modify_pattern_icm_addr;
u32 flex_protocols;
u8 flex_parser_id_icmp_dw0;
u8 flex_parser_id_icmp_dw1;
@@ -888,6 +902,9 @@ struct mlx5dr_cmd_caps {
struct mlx5dr_vports vports;
bool prio_tag_required;
struct mlx5dr_roce_cap roce_caps;
+ u16 log_header_modify_argument_granularity;
+ u16 log_header_modify_argument_max_alloc;
+ bool support_modify_argument;
u8 is_ecpf:1;
u8 isolate_vl_tc:1;
};
@@ -910,6 +927,7 @@ struct mlx5dr_domain_info {
u32 max_send_wr;
u32 max_log_sw_icm_sz;
u32 max_log_action_icm_sz;
+ u32 max_log_modify_hdr_pattern_icm_sz;
struct mlx5dr_domain_rx_tx rx;
struct mlx5dr_domain_rx_tx tx;
struct mlx5dr_cmd_caps caps;
@@ -928,6 +946,8 @@ struct mlx5dr_domain {
struct mlx5dr_send_info_pool *send_info_pool_tx;
struct kmem_cache *chunks_kmem_cache;
struct kmem_cache *htbls_kmem_cache;
+ struct mlx5dr_ptrn_mgr *ptrn_mgr;
+ struct mlx5dr_arg_mgr *arg_mgr;
struct mlx5dr_send_ring *send_ring;
struct mlx5dr_domain_info info;
struct xarray csum_fts_xa;
@@ -994,15 +1014,34 @@ struct mlx5dr_ste_action_modify_field {
u8 l4_type;
};
+struct mlx5dr_ptrn_obj {
+ struct mlx5dr_icm_chunk *chunk;
+ u8 *data;
+ u16 num_of_actions;
+ u32 index;
+ refcount_t refcount;
+ struct list_head list;
+};
+
+struct mlx5dr_arg_obj {
+ u32 obj_id;
+ u32 obj_offset;
+ struct list_head list_node;
+ u32 log_chunk_size;
+};
+
struct mlx5dr_action_rewrite {
struct mlx5dr_domain *dmn;
struct mlx5dr_icm_chunk *chunk;
u8 *data;
u16 num_of_actions;
u32 index;
+ u8 single_action_opt:1;
u8 allow_rx:1;
u8 allow_tx:1;
u8 modify_ttl:1;
+ struct mlx5dr_ptrn_obj *ptrn;
+ struct mlx5dr_arg_obj *arg;
};
struct mlx5dr_action_reformat {
@@ -1334,6 +1373,12 @@ struct mlx5dr_cmd_gid_attr {
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
u16 index, struct mlx5dr_cmd_gid_attr *attr);
+int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev,
+ u16 log_obj_range, u32 pd,
+ u32 *obj_id);
+void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev,
+ u32 obj_id);
+
struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
enum mlx5dr_icm_type icm_type);
void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
@@ -1368,6 +1413,7 @@ struct mlx5dr_qp {
struct mlx5_wq_ctrl wq_ctrl;
u32 qpn;
struct {
+ unsigned int head;
unsigned int pc;
unsigned int cc;
unsigned int size;
@@ -1399,9 +1445,6 @@ struct mlx5dr_mr {
size_t size;
};
-#define MAX_SEND_CQE 64
-#define MIN_READ_SYNC 64
-
struct mlx5dr_send_ring {
struct mlx5dr_cq *cq;
struct mlx5dr_qp *qp;
@@ -1416,7 +1459,7 @@ struct mlx5dr_send_ring {
u32 tx_head;
void *buf;
u32 buf_size;
- u8 sync_buff[MIN_READ_SYNC];
+ u8 *sync_buff;
struct mlx5dr_mr *sync_mr;
spinlock_t lock; /* Protect the data path of the send ring */
bool err_state; /* send_ring is not usable in err state */
@@ -1440,6 +1483,12 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
bool update_hw_ste);
int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
struct mlx5dr_action *action);
+int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
+ struct mlx5dr_icm_chunk *chunk,
+ u16 num_of_actions,
+ u8 *data);
+int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
+ u16 num_of_actions, u8 *actions_data);
int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn);
void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn);
@@ -1526,4 +1575,20 @@ static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev)
(1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT));
}
+bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn);
+struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn);
+void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr);
+struct mlx5dr_ptrn_obj *mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ u16 num_of_actions, u8 *data);
+void mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr,
+ struct mlx5dr_ptrn_obj *pattern);
+struct mlx5dr_arg_mgr *mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn);
+void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr);
+struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr,
+ u16 num_of_actions,
+ u8 *data);
+void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr,
+ struct mlx5dr_arg_obj *arg_obj);
+u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj);
+
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
index 790a17d6207f..ca3b0f1453a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
@@ -100,7 +100,7 @@ struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits {
u8 pointer[0x20];
};
-struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits {
+struct mlx5_ifc_ste_double_action_accelerated_modify_action_list_v1_bits {
u8 action_id[0x8];
u8 modify_actions_pattern_pointer[0x18];
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
index 017d68f1e123..972c571b4158 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
@@ -31,6 +31,8 @@ mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file,
if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) {
multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv);
+ if (!multi)
+ return NULL;
tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len));
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 48dbfea0a2a1..7cdf0ce24f28 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -26,7 +26,7 @@
#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
-#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS 400
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index f78e8ead8c36..c5aeeb964c17 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -1476,15 +1476,6 @@ static void hw_turn_on_intr(struct ksz_hw *hw, u32 bit)
hw_set_intr(hw, hw->intr_mask);
}
-static inline void hw_ena_intr_bit(struct ksz_hw *hw, uint interrupt)
-{
- u32 read_intr;
-
- read_intr = readl(hw->io + KS884X_INTERRUPTS_ENABLE);
- hw->intr_set = read_intr | interrupt;
- writel(hw->intr_set, hw->io + KS884X_INTERRUPTS_ENABLE);
-}
-
static inline void hw_read_intr(struct ksz_hw *hw, uint *status)
{
*status = readl(hw->io + KS884X_INTERRUPTS_STATUS);
@@ -1854,29 +1845,6 @@ static void port_init_cnt(struct ksz_hw *hw, int port)
*/
/**
- * port_chk - check port register bits
- * @hw: The hardware instance.
- * @port: The port index.
- * @offset: The offset of the port register.
- * @bits: The data bits to check.
- *
- * This function checks whether the specified bits of the port register are set
- * or not.
- *
- * Return 0 if the bits are not set.
- */
-static int port_chk(struct ksz_hw *hw, int port, int offset, u16 bits)
-{
- u32 addr;
- u16 data;
-
- PORT_CTRL_ADDR(port, addr);
- addr += offset;
- data = readw(hw->io + addr);
- return (data & bits) == bits;
-}
-
-/**
* port_cfg - set port register bits
* @hw: The hardware instance.
* @port: The port index.
@@ -1903,53 +1871,6 @@ static void port_cfg(struct ksz_hw *hw, int port, int offset, u16 bits,
}
/**
- * port_chk_shift - check port bit
- * @hw: The hardware instance.
- * @port: The port index.
- * @addr: The offset of the register.
- * @shift: Number of bits to shift.
- *
- * This function checks whether the specified port is set in the register or
- * not.
- *
- * Return 0 if the port is not set.
- */
-static int port_chk_shift(struct ksz_hw *hw, int port, u32 addr, int shift)
-{
- u16 data;
- u16 bit = 1 << port;
-
- data = readw(hw->io + addr);
- data >>= shift;
- return (data & bit) == bit;
-}
-
-/**
- * port_cfg_shift - set port bit
- * @hw: The hardware instance.
- * @port: The port index.
- * @addr: The offset of the register.
- * @shift: Number of bits to shift.
- * @set: The flag indicating whether the port is to be set or not.
- *
- * This routine sets or resets the specified port in the register.
- */
-static void port_cfg_shift(struct ksz_hw *hw, int port, u32 addr, int shift,
- int set)
-{
- u16 data;
- u16 bits = 1 << port;
-
- data = readw(hw->io + addr);
- bits <<= shift;
- if (set)
- data |= bits;
- else
- data &= ~bits;
- writew(data, hw->io + addr);
-}
-
-/**
* port_r8 - read byte from port register
* @hw: The hardware instance.
* @port: The port index.
@@ -2051,12 +1972,6 @@ static inline void port_cfg_broad_storm(struct ksz_hw *hw, int p, int set)
KS8842_PORT_CTRL_1_OFFSET, PORT_BROADCAST_STORM, set);
}
-static inline int port_chk_broad_storm(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_BROADCAST_STORM);
-}
-
/* Driver set switch broadcast storm protection at 10% rate. */
#define BROADCAST_STORM_PROTECTION_RATE 10
@@ -2209,102 +2124,6 @@ static inline void port_cfg_back_pressure(struct ksz_hw *hw, int p, int set)
KS8842_PORT_CTRL_2_OFFSET, PORT_BACK_PRESSURE, set);
}
-static inline void port_cfg_force_flow_ctrl(struct ksz_hw *hw, int p, int set)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_FORCE_FLOW_CTRL, set);
-}
-
-static inline int port_chk_back_pressure(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_BACK_PRESSURE);
-}
-
-static inline int port_chk_force_flow_ctrl(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_FORCE_FLOW_CTRL);
-}
-
-/* Spanning Tree */
-
-static inline void port_cfg_rx(struct ksz_hw *hw, int p, int set)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_RX_ENABLE, set);
-}
-
-static inline void port_cfg_tx(struct ksz_hw *hw, int p, int set)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_TX_ENABLE, set);
-}
-
-static inline void sw_cfg_fast_aging(struct ksz_hw *hw, int set)
-{
- sw_cfg(hw, KS8842_SWITCH_CTRL_1_OFFSET, SWITCH_FAST_AGING, set);
-}
-
-static inline void sw_flush_dyn_mac_table(struct ksz_hw *hw)
-{
- if (!(hw->overrides & FAST_AGING)) {
- sw_cfg_fast_aging(hw, 1);
- mdelay(1);
- sw_cfg_fast_aging(hw, 0);
- }
-}
-
-/* VLAN */
-
-static inline void port_cfg_ins_tag(struct ksz_hw *hw, int p, int insert)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_INSERT_TAG, insert);
-}
-
-static inline void port_cfg_rmv_tag(struct ksz_hw *hw, int p, int remove)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_REMOVE_TAG, remove);
-}
-
-static inline int port_chk_ins_tag(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_INSERT_TAG);
-}
-
-static inline int port_chk_rmv_tag(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_REMOVE_TAG);
-}
-
-static inline void port_cfg_dis_non_vid(struct ksz_hw *hw, int p, int set)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_DISCARD_NON_VID, set);
-}
-
-static inline void port_cfg_in_filter(struct ksz_hw *hw, int p, int set)
-{
- port_cfg(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_INGRESS_VLAN_FILTER, set);
-}
-
-static inline int port_chk_dis_non_vid(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_DISCARD_NON_VID);
-}
-
-static inline int port_chk_in_filter(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_INGRESS_VLAN_FILTER);
-}
-
/* Mirroring */
static inline void port_cfg_mirror_sniffer(struct ksz_hw *hw, int p, int set)
@@ -2342,28 +2161,6 @@ static void sw_init_mirror(struct ksz_hw *hw)
sw_cfg_mirror_rx_tx(hw, 0);
}
-static inline void sw_cfg_unk_def_deliver(struct ksz_hw *hw, int set)
-{
- sw_cfg(hw, KS8842_SWITCH_CTRL_7_OFFSET,
- SWITCH_UNK_DEF_PORT_ENABLE, set);
-}
-
-static inline int sw_cfg_chk_unk_def_deliver(struct ksz_hw *hw)
-{
- return sw_chk(hw, KS8842_SWITCH_CTRL_7_OFFSET,
- SWITCH_UNK_DEF_PORT_ENABLE);
-}
-
-static inline void sw_cfg_unk_def_port(struct ksz_hw *hw, int port, int set)
-{
- port_cfg_shift(hw, port, KS8842_SWITCH_CTRL_7_OFFSET, 0, set);
-}
-
-static inline int sw_chk_unk_def_port(struct ksz_hw *hw, int port)
-{
- return port_chk_shift(hw, port, KS8842_SWITCH_CTRL_7_OFFSET, 0);
-}
-
/* Priority */
static inline void port_cfg_diffserv(struct ksz_hw *hw, int p, int set)
@@ -2390,30 +2187,6 @@ static inline void port_cfg_prio(struct ksz_hw *hw, int p, int set)
KS8842_PORT_CTRL_1_OFFSET, PORT_PRIO_QUEUE_ENABLE, set);
}
-static inline int port_chk_diffserv(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_DIFFSERV_ENABLE);
-}
-
-static inline int port_chk_802_1p(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_802_1P_ENABLE);
-}
-
-static inline int port_chk_replace_vid(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_2_OFFSET, PORT_USER_PRIORITY_CEILING);
-}
-
-static inline int port_chk_prio(struct ksz_hw *hw, int p)
-{
- return port_chk(hw, p,
- KS8842_PORT_CTRL_1_OFFSET, PORT_PRIO_QUEUE_ENABLE);
-}
-
/**
* sw_dis_diffserv - disable switch DiffServ priority
* @hw: The hardware instance.
@@ -2614,23 +2387,6 @@ static void sw_cfg_port_base_vlan(struct ksz_hw *hw, int port, u8 member)
}
/**
- * sw_get_addr - get the switch MAC address.
- * @hw: The hardware instance.
- * @mac_addr: Buffer to store the MAC address.
- *
- * This function retrieves the MAC address of the switch.
- */
-static inline void sw_get_addr(struct ksz_hw *hw, u8 *mac_addr)
-{
- int i;
-
- for (i = 0; i < 6; i += 2) {
- mac_addr[i] = readb(hw->io + KS8842_MAC_ADDR_0_OFFSET + i);
- mac_addr[1 + i] = readb(hw->io + KS8842_MAC_ADDR_1_OFFSET + i);
- }
-}
-
-/**
* sw_set_addr - configure switch MAC address
* @hw: The hardware instance.
* @mac_addr: The MAC address.
@@ -2828,56 +2584,6 @@ static inline void hw_w_phy_ctrl(struct ksz_hw *hw, int phy, u16 data)
writew(data, hw->io + phy + KS884X_PHY_CTRL_OFFSET);
}
-static inline void hw_r_phy_link_stat(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_STATUS_OFFSET);
-}
-
-static inline void hw_r_phy_auto_neg(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_AUTO_NEG_OFFSET);
-}
-
-static inline void hw_w_phy_auto_neg(struct ksz_hw *hw, int phy, u16 data)
-{
- writew(data, hw->io + phy + KS884X_PHY_AUTO_NEG_OFFSET);
-}
-
-static inline void hw_r_phy_rem_cap(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_REMOTE_CAP_OFFSET);
-}
-
-static inline void hw_r_phy_crossover(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_CTRL_OFFSET);
-}
-
-static inline void hw_w_phy_crossover(struct ksz_hw *hw, int phy, u16 data)
-{
- writew(data, hw->io + phy + KS884X_PHY_CTRL_OFFSET);
-}
-
-static inline void hw_r_phy_polarity(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_PHY_CTRL_OFFSET);
-}
-
-static inline void hw_w_phy_polarity(struct ksz_hw *hw, int phy, u16 data)
-{
- writew(data, hw->io + phy + KS884X_PHY_PHY_CTRL_OFFSET);
-}
-
-static inline void hw_r_phy_link_md(struct ksz_hw *hw, int phy, u16 *data)
-{
- *data = readw(hw->io + phy + KS884X_PHY_LINK_MD_OFFSET);
-}
-
-static inline void hw_w_phy_link_md(struct ksz_hw *hw, int phy, u16 data)
-{
- writew(data, hw->io + phy + KS884X_PHY_LINK_MD_OFFSET);
-}
-
/**
* hw_r_phy - read data from PHY register
* @hw: The hardware instance.
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 9be6462f1cc5..2b6e046e1d10 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -605,7 +605,7 @@ static u64 lan966x_ifh_get(u8 *ifh, size_t pos, size_t length)
v = ifh[IFH_LEN_BYTES - (j / 8) - 1];
if (v & (1 << k))
- val |= (1 << i);
+ val |= (1ULL << i);
}
return val;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
index 3caea631229c..23b1521c0df9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
@@ -133,12 +133,6 @@ out:
return act;
}
-static unsigned int mana_xdp_fraglen(unsigned int len)
-{
- return SKB_DATA_ALIGN(len) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-}
-
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc)
{
ASSERT_RTNL();
@@ -179,17 +173,18 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
{
struct mana_port_context *apc = netdev_priv(ndev);
struct bpf_prog *old_prog;
- int buf_max;
+ struct gdma_context *gc;
+
+ gc = apc->ac->gdma_dev->gdma_context;
old_prog = mana_xdp_get(apc);
if (!old_prog && !prog)
return 0;
- buf_max = XDP_PACKET_HEADROOM + mana_xdp_fraglen(ndev->mtu + ETH_HLEN);
- if (prog && buf_max > PAGE_SIZE) {
- netdev_err(ndev, "XDP: mtu:%u too large, buf_max:%u\n",
- ndev->mtu, buf_max);
+ if (prog && ndev->mtu > MANA_XDP_MTU_MAX) {
+ netdev_err(ndev, "XDP: mtu:%u too large, mtu_max:%lu\n",
+ ndev->mtu, MANA_XDP_MTU_MAX);
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
return -EOPNOTSUPP;
@@ -206,6 +201,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
if (apc->port_is_up)
mana_chn_setxdp(apc, prog);
+ if (prog)
+ ndev->max_mtu = MANA_XDP_MTU_MAX;
+ else
+ ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
+
return 0;
}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 492474b4d8aa..cabecbfa1102 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -427,6 +427,192 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
return txq;
}
+/* Release pre-allocated RX buffers */
+static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
+{
+ struct device *dev;
+ int i;
+
+ dev = mpc->ac->gdma_dev->gdma_context->dev;
+
+ if (!mpc->rxbufs_pre)
+ goto out1;
+
+ if (!mpc->das_pre)
+ goto out2;
+
+ while (mpc->rxbpre_total) {
+ i = --mpc->rxbpre_total;
+ dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize,
+ DMA_FROM_DEVICE);
+ put_page(virt_to_head_page(mpc->rxbufs_pre[i]));
+ }
+
+ kfree(mpc->das_pre);
+ mpc->das_pre = NULL;
+
+out2:
+ kfree(mpc->rxbufs_pre);
+ mpc->rxbufs_pre = NULL;
+
+out1:
+ mpc->rxbpre_datasize = 0;
+ mpc->rxbpre_alloc_size = 0;
+ mpc->rxbpre_headroom = 0;
+}
+
+/* Get a buffer from the pre-allocated RX buffers */
+static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
+{
+ struct net_device *ndev = rxq->ndev;
+ struct mana_port_context *mpc;
+ void *va;
+
+ mpc = netdev_priv(ndev);
+
+ if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) {
+ netdev_err(ndev, "No RX pre-allocated bufs\n");
+ return NULL;
+ }
+
+ /* Check sizes to catch unexpected coding error */
+ if (mpc->rxbpre_datasize != rxq->datasize) {
+ netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n",
+ mpc->rxbpre_datasize, rxq->datasize);
+ return NULL;
+ }
+
+ if (mpc->rxbpre_alloc_size != rxq->alloc_size) {
+ netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n",
+ mpc->rxbpre_alloc_size, rxq->alloc_size);
+ return NULL;
+ }
+
+ if (mpc->rxbpre_headroom != rxq->headroom) {
+ netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n",
+ mpc->rxbpre_headroom, rxq->headroom);
+ return NULL;
+ }
+
+ mpc->rxbpre_total--;
+
+ *da = mpc->das_pre[mpc->rxbpre_total];
+ va = mpc->rxbufs_pre[mpc->rxbpre_total];
+ mpc->rxbufs_pre[mpc->rxbpre_total] = NULL;
+
+ /* Deallocate the array after all buffers are gone */
+ if (!mpc->rxbpre_total)
+ mana_pre_dealloc_rxbufs(mpc);
+
+ return va;
+}
+
+/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
+static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
+ u32 *headroom)
+{
+ if (mtu > MANA_XDP_MTU_MAX)
+ *headroom = 0; /* no support for XDP */
+ else
+ *headroom = XDP_PACKET_HEADROOM;
+
+ *alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
+
+ *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+}
+
+static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
+{
+ struct device *dev;
+ struct page *page;
+ dma_addr_t da;
+ int num_rxb;
+ void *va;
+ int i;
+
+ mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
+ &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
+
+ dev = mpc->ac->gdma_dev->gdma_context->dev;
+
+ num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE;
+
+ WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
+ mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
+ if (!mpc->rxbufs_pre)
+ goto error;
+
+ mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!mpc->das_pre)
+ goto error;
+
+ mpc->rxbpre_total = 0;
+
+ for (i = 0; i < num_rxb; i++) {
+ if (mpc->rxbpre_alloc_size > PAGE_SIZE) {
+ va = netdev_alloc_frag(mpc->rxbpre_alloc_size);
+ if (!va)
+ goto error;
+ } else {
+ page = dev_alloc_page();
+ if (!page)
+ goto error;
+
+ va = page_to_virt(page);
+ }
+
+ da = dma_map_single(dev, va + mpc->rxbpre_headroom,
+ mpc->rxbpre_datasize, DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(dev, da)) {
+ put_page(virt_to_head_page(va));
+ goto error;
+ }
+
+ mpc->rxbufs_pre[i] = va;
+ mpc->das_pre[i] = da;
+ mpc->rxbpre_total = i + 1;
+ }
+
+ return 0;
+
+error:
+ mana_pre_dealloc_rxbufs(mpc);
+ return -ENOMEM;
+}
+
+static int mana_change_mtu(struct net_device *ndev, int new_mtu)
+{
+ struct mana_port_context *mpc = netdev_priv(ndev);
+ unsigned int old_mtu = ndev->mtu;
+ int err;
+
+ /* Pre-allocate buffers to prevent failure in mana_attach later */
+ err = mana_pre_alloc_rxbufs(mpc, new_mtu);
+ if (err) {
+ netdev_err(ndev, "Insufficient memory for new MTU\n");
+ return err;
+ }
+
+ err = mana_detach(ndev, false);
+ if (err) {
+ netdev_err(ndev, "mana_detach failed: %d\n", err);
+ goto out;
+ }
+
+ ndev->mtu = new_mtu;
+
+ err = mana_attach(ndev);
+ if (err) {
+ netdev_err(ndev, "mana_attach failed: %d\n", err);
+ ndev->mtu = old_mtu;
+ }
+
+out:
+ mana_pre_dealloc_rxbufs(mpc);
+ return err;
+}
+
static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
@@ -436,6 +622,7 @@ static const struct net_device_ops mana_devops = {
.ndo_get_stats64 = mana_get_stats64,
.ndo_bpf = mana_bpf,
.ndo_xdp_xmit = mana_xdp_xmit,
+ .ndo_change_mtu = mana_change_mtu,
};
static void mana_cleanup_port_context(struct mana_port_context *apc)
@@ -625,6 +812,9 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
sizeof(req), sizeof(resp));
+
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
+
req.proto_major_ver = proto_major_ver;
req.proto_minor_ver = proto_minor_ver;
req.proto_micro_ver = proto_micro_ver;
@@ -647,6 +837,11 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
*max_num_vports = resp.max_num_vports;
+ if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2)
+ gc->adapter_mtu = resp.adapter_mtu;
+ else
+ gc->adapter_mtu = ETH_FRAME_LEN;
+
return 0;
}
@@ -1185,10 +1380,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
}
-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
- struct xdp_buff *xdp)
+static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
+ uint pkt_len, struct xdp_buff *xdp)
{
- struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE);
+ struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
if (!skb)
return NULL;
@@ -1196,11 +1391,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
if (xdp->data_hard_start) {
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- } else {
- skb_reserve(skb, XDP_PACKET_HEADROOM);
- skb_put(skb, pkt_len);
+ return skb;
}
+ skb_reserve(skb, rxq->headroom);
+ skb_put(skb, pkt_len);
+
return skb;
}
@@ -1233,7 +1429,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
- skb = mana_build_skb(buf_va, pkt_len, &xdp);
+ skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
if (!skb)
goto drop;
@@ -1282,14 +1478,72 @@ drop_xdp:
u64_stats_update_end(&rx_stats->syncp);
drop:
- WARN_ON_ONCE(rxq->xdp_save_page);
- rxq->xdp_save_page = virt_to_page(buf_va);
+ WARN_ON_ONCE(rxq->xdp_save_va);
+ /* Save for reuse */
+ rxq->xdp_save_va = buf_va;
++ndev->stats.rx_dropped;
return;
}
+static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
+ dma_addr_t *da, bool is_napi)
+{
+ struct page *page;
+ void *va;
+
+ /* Reuse XDP dropped page if available */
+ if (rxq->xdp_save_va) {
+ va = rxq->xdp_save_va;
+ rxq->xdp_save_va = NULL;
+ } else if (rxq->alloc_size > PAGE_SIZE) {
+ if (is_napi)
+ va = napi_alloc_frag(rxq->alloc_size);
+ else
+ va = netdev_alloc_frag(rxq->alloc_size);
+
+ if (!va)
+ return NULL;
+ } else {
+ page = dev_alloc_page();
+ if (!page)
+ return NULL;
+
+ va = page_to_virt(page);
+ }
+
+ *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
+ DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(dev, *da)) {
+ put_page(virt_to_head_page(va));
+ return NULL;
+ }
+
+ return va;
+}
+
+/* Allocate frag for rx buffer, and save the old buf */
+static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq,
+ struct mana_recv_buf_oob *rxoob, void **old_buf)
+{
+ dma_addr_t da;
+ void *va;
+
+ va = mana_get_rxfrag(rxq, dev, &da, true);
+
+ if (!va)
+ return;
+
+ dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
+ DMA_FROM_DEVICE);
+ *old_buf = rxoob->buf_va;
+
+ rxoob->buf_va = va;
+ rxoob->sgl[0].address = da;
+}
+
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct gdma_comp *cqe)
{
@@ -1299,10 +1553,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct mana_recv_buf_oob *rxbuf_oob;
struct mana_port_context *apc;
struct device *dev = gc->dev;
- void *new_buf, *old_buf;
- struct page *new_page;
+ void *old_buf = NULL;
u32 curr, pktlen;
- dma_addr_t da;
apc = netdev_priv(ndev);
@@ -1345,40 +1597,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- /* Reuse XDP dropped page if available */
- if (rxq->xdp_save_page) {
- new_page = rxq->xdp_save_page;
- rxq->xdp_save_page = NULL;
- } else {
- new_page = alloc_page(GFP_ATOMIC);
- }
-
- if (new_page) {
- da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
- DMA_FROM_DEVICE);
-
- if (dma_mapping_error(dev, da)) {
- __free_page(new_page);
- new_page = NULL;
- }
- }
-
- new_buf = new_page ? page_to_virt(new_page) : NULL;
-
- if (new_buf) {
- dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize,
- DMA_FROM_DEVICE);
-
- old_buf = rxbuf_oob->buf_va;
-
- /* refresh the rxbuf_oob with the new page */
- rxbuf_oob->buf_va = new_buf;
- rxbuf_oob->buf_dma_addr = da;
- rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
- } else {
- old_buf = NULL; /* drop the packet if no memory */
- }
+ mana_refill_rxoob(dev, rxq, rxbuf_oob, &old_buf);
+ /* Unsuccessful refill will have old_buf == NULL.
+ * In this case, mana_rx_skb() will drop the packet.
+ */
mana_rx_skb(old_buf, oob, rxq);
drop:
@@ -1659,8 +1882,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
mana_deinit_cq(apc, &rxq->rx_cq);
- if (rxq->xdp_save_page)
- __free_page(rxq->xdp_save_page);
+ if (rxq->xdp_save_va)
+ put_page(virt_to_head_page(rxq->xdp_save_va));
for (i = 0; i < rxq->num_rx_buf; i++) {
rx_oob = &rxq->rx_oobs[i];
@@ -1668,10 +1891,10 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
if (!rx_oob->buf_va)
continue;
- dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize,
- DMA_FROM_DEVICE);
+ dma_unmap_single(dev, rx_oob->sgl[0].address,
+ rx_oob->sgl[0].size, DMA_FROM_DEVICE);
- free_page((unsigned long)rx_oob->buf_va);
+ put_page(virt_to_head_page(rx_oob->buf_va));
rx_oob->buf_va = NULL;
}
@@ -1681,6 +1904,30 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
kfree(rxq);
}
+static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
+ struct mana_rxq *rxq, struct device *dev)
+{
+ struct mana_port_context *mpc = netdev_priv(rxq->ndev);
+ dma_addr_t da;
+ void *va;
+
+ if (mpc->rxbufs_pre)
+ va = mana_get_rxbuf_pre(rxq, &da);
+ else
+ va = mana_get_rxfrag(rxq, dev, &da, false);
+
+ if (!va)
+ return -ENOMEM;
+
+ rx_oob->buf_va = va;
+
+ rx_oob->sgl[0].address = da;
+ rx_oob->sgl[0].size = rxq->datasize;
+ rx_oob->sgl[0].mem_key = mem_key;
+
+ return 0;
+}
+
#define MANA_WQE_HEADER_SIZE 16
#define MANA_WQE_SGE_SIZE 16
@@ -1690,11 +1937,10 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
- struct page *page;
- dma_addr_t da;
u32 buf_idx;
+ int ret;
- WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+ WARN_ON(rxq->datasize == 0);
*rxq_size = 0;
*cq_size = 0;
@@ -1703,25 +1949,12 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
rx_oob = &rxq->rx_oobs[buf_idx];
memset(rx_oob, 0, sizeof(*rx_oob));
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
- DMA_FROM_DEVICE);
-
- if (dma_mapping_error(dev, da)) {
- __free_page(page);
- return -ENOMEM;
- }
-
- rx_oob->buf_va = page_to_virt(page);
- rx_oob->buf_dma_addr = da;
-
rx_oob->num_sge = 1;
- rx_oob->sgl[0].address = rx_oob->buf_dma_addr;
- rx_oob->sgl[0].size = rxq->datasize;
- rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
+
+ ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq,
+ dev);
+ if (ret)
+ return ret;
rx_oob->wqe_req.sgl = rx_oob->sgl;
rx_oob->wqe_req.num_sge = rx_oob->num_sge;
@@ -1780,9 +2013,11 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
rxq->ndev = ndev;
rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
rxq->rxq_idx = rxq_idx;
- rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64);
rxq->rxobj = INVALID_MANA_HANDLE;
+ mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
+ &rxq->headroom);
+
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
if (err)
goto out;
@@ -2194,8 +2429,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
ndev->netdev_ops = &mana_devops;
ndev->ethtool_ops = &mana_ethtool_ops;
ndev->mtu = ETH_DATA_LEN;
- ndev->max_mtu = ndev->mtu;
- ndev->min_mtu = ndev->mtu;
+ ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
+ ndev->min_mtu = ETH_MIN_MTU;
ndev->needed_headroom = MANA_HEADROOM;
ndev->dev_port = port_idx;
SET_NETDEV_DEV(ndev, gc->dev);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 1502bb2c8ea7..1f5f00b30441 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -8,6 +8,7 @@
#include <linux/if_bridge.h>
#include <linux/iopoll.h>
#include <linux/phy/phy.h>
+#include <net/pkt_sched.h>
#include <soc/mscc/ocelot_hsio.h>
#include <soc/mscc/ocelot_vcap.h>
#include "ocelot.h"
@@ -1005,7 +1006,12 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
*/
if (ocelot->ops->cut_through_fwd) {
mutex_lock(&ocelot->fwd_domain_lock);
- ocelot->ops->cut_through_fwd(ocelot);
+ /* Workaround for hardware bug - FP doesn't work
+ * at all link speeds for all PHY modes. The function
+ * below also calls ocelot->ops->cut_through_fwd(),
+ * so we don't need to do it twice.
+ */
+ ocelot_port_update_active_preemptible_tcs(ocelot, port);
mutex_unlock(&ocelot->fwd_domain_lock);
}
@@ -2699,6 +2705,58 @@ void ocelot_port_mirror_del(struct ocelot *ocelot, int from, bool ingress)
}
EXPORT_SYMBOL_GPL(ocelot_port_mirror_del);
+static void ocelot_port_reset_mqprio(struct ocelot *ocelot, int port)
+{
+ struct net_device *dev = ocelot->ops->port_to_netdev(ocelot, port);
+
+ netdev_reset_tc(dev);
+ ocelot_port_change_fp(ocelot, port, 0);
+}
+
+int ocelot_port_mqprio(struct ocelot *ocelot, int port,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ struct net_device *dev = ocelot->ops->port_to_netdev(ocelot, port);
+ struct netlink_ext_ack *extack = mqprio->extack;
+ struct tc_mqprio_qopt *qopt = &mqprio->qopt;
+ int num_tc = qopt->num_tc;
+ int tc, err;
+
+ if (!num_tc) {
+ ocelot_port_reset_mqprio(ocelot, port);
+ return 0;
+ }
+
+ err = netdev_set_num_tc(dev, num_tc);
+ if (err)
+ return err;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (qopt->count[tc] != 1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only one TXQ per TC supported");
+ return -EINVAL;
+ }
+
+ err = netdev_set_tc_queue(dev, tc, 1, qopt->offset[tc]);
+ if (err)
+ goto err_reset_tc;
+ }
+
+ err = netif_set_real_num_tx_queues(dev, num_tc);
+ if (err)
+ goto err_reset_tc;
+
+ ocelot_port_change_fp(ocelot, port, mqprio->preemptible_tcs);
+
+ return 0;
+
+err_reset_tc:
+ ocelot_port_reset_mqprio(ocelot, port);
+ return err;
+}
+EXPORT_SYMBOL_GPL(ocelot_port_mqprio);
+
void ocelot_init_port(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index e9a0179448bf..87f2055c242c 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -74,6 +74,15 @@ struct ocelot_multicast {
struct ocelot_pgid *pgid;
};
+static inline void ocelot_reg_to_target_addr(struct ocelot *ocelot,
+ enum ocelot_reg reg,
+ enum ocelot_target *target,
+ u32 *addr)
+{
+ *target = reg >> TARGET_OFFSET;
+ *addr = ocelot->map[*target][reg & REG_MASK];
+}
+
int ocelot_bridge_num_find(struct ocelot *ocelot,
const struct net_device *bridge);
@@ -85,9 +94,6 @@ int ocelot_mact_forget(struct ocelot *ocelot,
struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port);
int ocelot_netdev_to_port(struct net_device *dev);
-u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
-void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
-
int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
struct device_node *portnp);
void ocelot_release_port(struct ocelot_port *ocelot_port);
@@ -110,6 +116,9 @@ int ocelot_stats_init(struct ocelot *ocelot);
void ocelot_stats_deinit(struct ocelot *ocelot);
int ocelot_mm_init(struct ocelot *ocelot);
+void ocelot_port_change_fp(struct ocelot *ocelot, int port,
+ unsigned long preemptible_tcs);
+void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port);
extern struct notifier_block ocelot_netdevice_nb;
extern struct notifier_block ocelot_switchdev_nb;
diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
index 2067382d0ee1..3aa7dc29ebe1 100644
--- a/drivers/net/ethernet/mscc/ocelot_io.c
+++ b/drivers/net/ethernet/mscc/ocelot_io.c
@@ -10,57 +10,60 @@
#include "ocelot.h"
-int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
- int count)
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, enum ocelot_reg reg,
+ u32 offset, void *buf, int count)
{
- u16 target = reg >> TARGET_OFFSET;
+ enum ocelot_target target;
+ u32 addr;
+ ocelot_reg_to_target_addr(ocelot, reg, &target, &addr);
WARN_ON(!target);
- return regmap_bulk_read(ocelot->targets[target],
- ocelot->map[target][reg & REG_MASK] + offset,
+ return regmap_bulk_read(ocelot->targets[target], addr + offset,
buf, count);
}
EXPORT_SYMBOL_GPL(__ocelot_bulk_read_ix);
-u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset)
+u32 __ocelot_read_ix(struct ocelot *ocelot, enum ocelot_reg reg, u32 offset)
{
- u16 target = reg >> TARGET_OFFSET;
- u32 val;
+ enum ocelot_target target;
+ u32 addr, val;
+ ocelot_reg_to_target_addr(ocelot, reg, &target, &addr);
WARN_ON(!target);
- regmap_read(ocelot->targets[target],
- ocelot->map[target][reg & REG_MASK] + offset, &val);
+ regmap_read(ocelot->targets[target], addr + offset, &val);
return val;
}
EXPORT_SYMBOL_GPL(__ocelot_read_ix);
-void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset)
+void __ocelot_write_ix(struct ocelot *ocelot, u32 val, enum ocelot_reg reg,
+ u32 offset)
{
- u16 target = reg >> TARGET_OFFSET;
+ enum ocelot_target target;
+ u32 addr;
+ ocelot_reg_to_target_addr(ocelot, reg, &target, &addr);
WARN_ON(!target);
- regmap_write(ocelot->targets[target],
- ocelot->map[target][reg & REG_MASK] + offset, val);
+ regmap_write(ocelot->targets[target], addr + offset, val);
}
EXPORT_SYMBOL_GPL(__ocelot_write_ix);
-void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
- u32 offset)
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask,
+ enum ocelot_reg reg, u32 offset)
{
- u16 target = reg >> TARGET_OFFSET;
+ enum ocelot_target target;
+ u32 addr;
+ ocelot_reg_to_target_addr(ocelot, reg, &target, &addr);
WARN_ON(!target);
- regmap_update_bits(ocelot->targets[target],
- ocelot->map[target][reg & REG_MASK] + offset,
- mask, val);
+ regmap_update_bits(ocelot->targets[target], addr + offset, mask, val);
}
EXPORT_SYMBOL_GPL(__ocelot_rmw_ix);
-u32 ocelot_port_readl(struct ocelot_port *port, u32 reg)
+u32 ocelot_port_readl(struct ocelot_port *port, enum ocelot_reg reg)
{
struct ocelot *ocelot = port->ocelot;
u16 target = reg >> TARGET_OFFSET;
@@ -73,7 +76,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg)
}
EXPORT_SYMBOL_GPL(ocelot_port_readl);
-void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
+void ocelot_port_writel(struct ocelot_port *port, u32 val, enum ocelot_reg reg)
{
struct ocelot *ocelot = port->ocelot;
u16 target = reg >> TARGET_OFFSET;
@@ -84,7 +87,8 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
}
EXPORT_SYMBOL_GPL(ocelot_port_writel);
-void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg)
+void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask,
+ enum ocelot_reg reg)
{
u32 cur = ocelot_port_readl(port, reg);
diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c
index 0a8f21ae23f0..fb3145118d68 100644
--- a/drivers/net/ethernet/mscc/ocelot_mm.c
+++ b/drivers/net/ethernet/mscc/ocelot_mm.c
@@ -49,14 +49,68 @@ static enum ethtool_mm_verify_status ocelot_mm_verify_status(u32 val)
}
}
-void ocelot_port_mm_irq(struct ocelot *ocelot, int port)
+void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port)
+{
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct ocelot_mm_state *mm = &ocelot->mm[port];
+ u32 val = 0;
+
+ lockdep_assert_held(&ocelot->fwd_domain_lock);
+
+ /* Only commit preemptible TCs when MAC Merge is active.
+ * On NXP LS1028A, when using QSGMII, the port hangs if transmitting
+ * preemptible frames at any other link speed than gigabit, so avoid
+ * preemption at lower speeds in this PHY mode.
+ */
+ if ((ocelot_port->phy_mode != PHY_INTERFACE_MODE_QSGMII ||
+ ocelot_port->speed == SPEED_1000) && mm->tx_active)
+ val = mm->preemptible_tcs;
+
+ /* Cut through switching doesn't work for preemptible priorities,
+ * so first make sure it is disabled.
+ */
+ mm->active_preemptible_tcs = val;
+ ocelot->ops->cut_through_fwd(ocelot);
+
+ dev_dbg(ocelot->dev,
+ "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n",
+ port, phy_modes(ocelot_port->phy_mode),
+ phy_speed_to_str(ocelot_port->speed),
+ mm->tx_active ? "active" : "inactive", mm->preemptible_tcs,
+ mm->active_preemptible_tcs);
+
+ ocelot_rmw_rix(ocelot, QSYS_PREEMPTION_CFG_P_QUEUES(val),
+ QSYS_PREEMPTION_CFG_P_QUEUES_M,
+ QSYS_PREEMPTION_CFG, port);
+}
+
+void ocelot_port_change_fp(struct ocelot *ocelot, int port,
+ unsigned long preemptible_tcs)
+{
+ struct ocelot_mm_state *mm = &ocelot->mm[port];
+
+ mutex_lock(&ocelot->fwd_domain_lock);
+
+ if (mm->preemptible_tcs == preemptible_tcs)
+ goto out_unlock;
+
+ mm->preemptible_tcs = preemptible_tcs;
+
+ ocelot_port_update_active_preemptible_tcs(ocelot, port);
+
+out_unlock:
+ mutex_unlock(&ocelot->fwd_domain_lock);
+}
+
+static void ocelot_mm_update_port_status(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
struct ocelot_mm_state *mm = &ocelot->mm[port];
enum ethtool_mm_verify_status verify_status;
- u32 val;
+ u32 val, ack = 0;
- mutex_lock(&mm->lock);
+ if (!mm->tx_enabled)
+ return;
val = ocelot_port_readl(ocelot_port, DEV_MM_STATUS);
@@ -73,25 +127,43 @@ void ocelot_port_mm_irq(struct ocelot *ocelot, int port)
dev_dbg(ocelot->dev, "Port %d TX preemption %s\n",
port, mm->tx_active ? "active" : "inactive");
+ ocelot_port_update_active_preemptible_tcs(ocelot, port);
+
+ ack |= DEV_MM_STAT_MM_STATUS_PRMPT_ACTIVE_STICKY;
}
if (val & DEV_MM_STAT_MM_STATUS_UNEXP_RX_PFRM_STICKY) {
dev_err(ocelot->dev,
"Unexpected P-frame received on port %d while verification was unsuccessful or not yet verified\n",
port);
+
+ ack |= DEV_MM_STAT_MM_STATUS_UNEXP_RX_PFRM_STICKY;
}
if (val & DEV_MM_STAT_MM_STATUS_UNEXP_TX_PFRM_STICKY) {
dev_err(ocelot->dev,
"Unexpected P-frame requested to be transmitted on port %d while verification was unsuccessful or not yet verified, or MM_TX_ENA=0\n",
port);
+
+ ack |= DEV_MM_STAT_MM_STATUS_UNEXP_TX_PFRM_STICKY;
}
- ocelot_port_writel(ocelot_port, val, DEV_MM_STATUS);
+ if (ack)
+ ocelot_port_writel(ocelot_port, ack, DEV_MM_STATUS);
+}
- mutex_unlock(&mm->lock);
+void ocelot_mm_irq(struct ocelot *ocelot)
+{
+ int port;
+
+ mutex_lock(&ocelot->fwd_domain_lock);
+
+ for (port = 0; port < ocelot->num_phys_ports; port++)
+ ocelot_mm_update_port_status(ocelot, port);
+
+ mutex_unlock(&ocelot->fwd_domain_lock);
}
-EXPORT_SYMBOL_GPL(ocelot_port_mm_irq);
+EXPORT_SYMBOL_GPL(ocelot_mm_irq);
int ocelot_port_set_mm(struct ocelot *ocelot, int port,
struct ethtool_mm_cfg *cfg,
@@ -121,7 +193,7 @@ int ocelot_port_set_mm(struct ocelot *ocelot, int port,
if (!cfg->verify_enabled)
verify_disable = DEV_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_DIS;
- mutex_lock(&mm->lock);
+ mutex_lock(&ocelot->fwd_domain_lock);
ocelot_port_rmwl(ocelot_port, mm_enable,
DEV_MM_CONFIG_ENABLE_CONFIG_MM_TX_ENA |
@@ -140,7 +212,20 @@ int ocelot_port_set_mm(struct ocelot *ocelot, int port,
QSYS_PREEMPTION_CFG,
port);
- mutex_unlock(&mm->lock);
+ /* The switch will emit an IRQ when TX is disabled, to notify that it
+ * has become inactive. We optimize ocelot_mm_update_port_status() to
+ * not bother processing MM IRQs at all for ports with TX disabled,
+ * but we need to ACK this IRQ now, while mm->tx_enabled is still set,
+ * otherwise we get an IRQ storm.
+ */
+ if (mm->tx_enabled && !cfg->tx_enabled) {
+ ocelot_mm_update_port_status(ocelot, port);
+ WARN_ON(mm->tx_active);
+ }
+
+ mm->tx_enabled = cfg->tx_enabled;
+
+ mutex_unlock(&ocelot->fwd_domain_lock);
return 0;
}
@@ -158,7 +243,7 @@ int ocelot_port_get_mm(struct ocelot *ocelot, int port,
mm = &ocelot->mm[port];
- mutex_lock(&mm->lock);
+ mutex_lock(&ocelot->fwd_domain_lock);
val = ocelot_port_readl(ocelot_port, DEV_MM_ENABLE_CONFIG);
state->pmac_enabled = !!(val & DEV_MM_CONFIG_ENABLE_CONFIG_MM_RX_ENA);
@@ -174,10 +259,11 @@ int ocelot_port_get_mm(struct ocelot *ocelot, int port,
state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(add_frag_size);
state->rx_min_frag_size = ETH_ZLEN;
+ ocelot_mm_update_port_status(ocelot, port);
state->verify_status = mm->verify_status;
state->tx_active = mm->tx_active;
- mutex_unlock(&mm->lock);
+ mutex_unlock(&ocelot->fwd_domain_lock);
return 0;
}
@@ -201,7 +287,6 @@ int ocelot_mm_init(struct ocelot *ocelot)
u32 val;
mm = &ocelot->mm[port];
- mutex_init(&mm->lock);
ocelot_port = ocelot->ports[port];
/* Update initial status variable for the
diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c
index d0e6cd8dbe5c..5c55197c7327 100644
--- a/drivers/net/ethernet/mscc/ocelot_stats.c
+++ b/drivers/net/ethernet/mscc/ocelot_stats.c
@@ -145,7 +145,7 @@ enum ocelot_stat {
};
struct ocelot_stat_layout {
- u32 reg;
+ enum ocelot_reg reg;
char name[ETH_GSTRING_LEN];
};
@@ -257,7 +257,7 @@ struct ocelot_stat_layout {
struct ocelot_stats_region {
struct list_head node;
- u32 base;
+ enum ocelot_reg base;
enum ocelot_stat first_stat;
int count;
u32 *buf;
@@ -395,7 +395,7 @@ static void ocelot_check_stats_work(struct work_struct *work)
void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
{
const struct ocelot_stat_layout *layout;
- int i;
+ enum ocelot_stat i;
if (sset != ETH_SS_STATS)
return;
@@ -442,7 +442,8 @@ out_unlock:
int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
{
const struct ocelot_stat_layout *layout;
- int i, num_stats = 0;
+ enum ocelot_stat i;
+ int num_stats = 0;
if (sset != ETH_SS_STATS)
return -EOPNOTSUPP;
@@ -461,8 +462,8 @@ static void ocelot_port_ethtool_stats_cb(struct ocelot *ocelot, int port,
void *priv)
{
const struct ocelot_stat_layout *layout;
+ enum ocelot_stat i;
u64 *data = priv;
- int i;
layout = ocelot_get_stats_layout(ocelot);
@@ -889,8 +890,8 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
{
struct ocelot_stats_region *region = NULL;
const struct ocelot_stat_layout *layout;
- unsigned int last = 0;
- int i;
+ enum ocelot_reg last = 0;
+ enum ocelot_stat i;
INIT_LIST_HEAD(&ocelot->stats_regions);
@@ -900,6 +901,17 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
if (!layout[i].reg)
continue;
+ /* enum ocelot_stat must be kept sorted in the same order
+ * as the addresses behind layout[i].reg in order to have
+ * efficient bulking
+ */
+ if (last) {
+ WARN(ocelot->map[SYS][last & REG_MASK] >= ocelot->map[SYS][layout[i].reg & REG_MASK],
+ "reg 0x%x had address 0x%x but reg 0x%x has address 0x%x, bulking broken!",
+ last, ocelot->map[SYS][last & REG_MASK],
+ layout[i].reg, ocelot->map[SYS][layout[i].reg & REG_MASK]);
+ }
+
if (region && ocelot->map[SYS][layout[i].reg & REG_MASK] ==
ocelot->map[SYS][last & REG_MASK] + 4) {
region->count++;
@@ -909,12 +921,6 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
if (!region)
return -ENOMEM;
- /* enum ocelot_stat must be kept sorted in the same
- * order as layout[i].reg in order to have efficient
- * bulking
- */
- WARN_ON(last >= layout[i].reg);
-
region->base = layout[i].reg;
region->first_stat = i;
region->count = 1;
@@ -925,6 +931,15 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
}
list_for_each_entry(region, &ocelot->stats_regions, node) {
+ enum ocelot_target target;
+ u32 addr;
+
+ ocelot_reg_to_target_addr(ocelot, region->base, &target,
+ &addr);
+
+ dev_dbg(ocelot->dev,
+ "region of %d contiguous counters starting with SYS:STAT:CNT[0x%03x]\n",
+ region->count, addr / 4);
region->buf = devm_kcalloc(ocelot->dev, region->count,
sizeof(*region->buf), GFP_KERNEL);
if (!region->buf)
@@ -972,4 +987,3 @@ void ocelot_stats_deinit(struct ocelot *ocelot)
cancel_delayed_work(&ocelot->stats_work);
destroy_workqueue(ocelot->stats_queue);
}
-
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
index 5cabb1aa9c0c..0d6c59d6d4ae 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
@@ -115,7 +115,7 @@ static const struct hwmon_channel_info nfp_power = {
.config = nfp_power_config,
};
-static const struct hwmon_channel_info *nfp_hwmon_info[] = {
+static const struct hwmon_channel_info * const nfp_hwmon_info[] = {
&nfp_chip,
&nfp_temp,
&nfp_power,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 4f2308570dcf..54640bcb70fb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -189,6 +189,7 @@ int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
port->eth_port = &pf->eth_tbl->ports[id];
port->eth_id = pf->eth_tbl->ports[id].index;
+ port->netdev->dev_port = id;
if (pf->mac_stats_mem)
port->eth_stats =
pf->mac_stats_mem + port->eth_id * NFP_MAC_STATS_SIZE;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index eac2f0e3576e..7505efdff8e9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -579,11 +579,10 @@ void ionic_lif_alloc_phc(struct ionic_lif *lif)
diff |= diff >> 16;
diff |= diff >> 32;
- /* constrain to the hardware bitmask, and use this as the bitmask */
+ /* constrain to the hardware bitmask */
diff &= phc->cc.mask;
- phc->cc.mask = diff;
- /* the wrap period is now defined by diff (or phc->cc.mask)
+ /* the wrap period is now defined by diff
*
* we will update the time basis at about 1/4 the wrap period, so
* should not see a difference of more than +/- diff/4.
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 87f76bac2e46..eb827b86ecae 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -628,7 +628,13 @@ int qlcnic_fw_create_ctx(struct qlcnic_adapter *dev)
int i, err, ring;
if (dev->flags & QLCNIC_NEED_FLR) {
- pci_reset_function(dev->pdev);
+ err = pci_reset_function(dev->pdev);
+ if (err) {
+ dev_err(&dev->pdev->dev,
+ "Adapter reset failed (%d). Please reboot\n",
+ err);
+ return err;
+ }
dev->flags &= ~QLCNIC_NEED_FLR;
}
diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index a4434eb38950..9210ff360fdc 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig
@@ -52,6 +52,7 @@ config QCOM_EMAC
depends on HAS_DMA && HAS_IOMEM
select CRC32
select PHYLIB
+ select MDIO_DEVRES
help
This driver supports the Qualcomm Technologies, Inc. Gigabit
Ethernet Media Access Controller (EMAC). The controller
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9f8357bbc8a4..a7e376e7e689 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -30,6 +30,7 @@
#include <linux/ipv6.h>
#include <asm/unaligned.h>
#include <net/ip6_checksum.h>
+#include <net/netdev_queues.h>
#include "r8169.h"
#include "r8169_firmware.h"
@@ -68,6 +69,8 @@
#define NUM_RX_DESC 256 /* Number of Rx descriptor registers */
#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
#define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc))
+#define R8169_TX_STOP_THRS (MAX_SKB_FRAGS + 1)
+#define R8169_TX_START_THRS (2 * R8169_TX_STOP_THRS)
#define OCP_STD_PHY_BASE 0xa400
@@ -4162,13 +4165,9 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
return true;
}
-static bool rtl_tx_slots_avail(struct rtl8169_private *tp)
+static unsigned int rtl_tx_slots_avail(struct rtl8169_private *tp)
{
- unsigned int slots_avail = READ_ONCE(tp->dirty_tx) + NUM_TX_DESC
- - READ_ONCE(tp->cur_tx);
-
- /* A skbuff with nr_frags needs nr_frags+1 entries in the tx queue */
- return slots_avail > MAX_SKB_FRAGS;
+ return READ_ONCE(tp->dirty_tx) + NUM_TX_DESC - READ_ONCE(tp->cur_tx);
}
/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */
@@ -4245,27 +4244,10 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
WRITE_ONCE(tp->cur_tx, tp->cur_tx + frags + 1);
- stop_queue = !rtl_tx_slots_avail(tp);
- if (unlikely(stop_queue)) {
- /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must
- * not miss a ring update when it notices a stopped queue.
- */
- smp_wmb();
- netif_stop_queue(dev);
- /* Sync with rtl_tx:
- * - publish queue status and cur_tx ring index (write barrier)
- * - refresh dirty_tx ring index (read barrier).
- * May the current thread have a pessimistic view of the ring
- * status and forget to wake up queue, a racing rtl_tx thread
- * can't.
- */
- smp_mb__after_atomic();
- if (rtl_tx_slots_avail(tp))
- netif_start_queue(dev);
- door_bell = true;
- }
-
- if (door_bell)
+ stop_queue = !netif_subqueue_maybe_stop(dev, 0, rtl_tx_slots_avail(tp),
+ R8169_TX_STOP_THRS,
+ R8169_TX_START_THRS);
+ if (door_bell || stop_queue)
rtl8169_doorbell(tp);
return NETDEV_TX_OK;
@@ -4389,19 +4371,12 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
}
if (tp->dirty_tx != dirty_tx) {
- netdev_completed_queue(dev, pkts_compl, bytes_compl);
dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
+ WRITE_ONCE(tp->dirty_tx, dirty_tx);
- /* Sync with rtl8169_start_xmit:
- * - publish dirty_tx ring index (write barrier)
- * - refresh cur_tx ring index and queue status (read barrier)
- * May the current thread miss the stopped queue condition,
- * a racing xmit thread can only have a right view of the
- * ring status.
- */
- smp_store_mb(tp->dirty_tx, dirty_tx);
- if (netif_queue_stopped(dev) && rtl_tx_slots_avail(tp))
- netif_wake_queue(dev);
+ netif_subqueue_completed_wake(dev, 0, pkts_compl, bytes_compl,
+ rtl_tx_slots_avail(tp),
+ R8169_TX_START_THRS);
/*
* 8168 hack: TxPoll requests are lost when the Tx packets are
* too close. Let's kick an extra TxPoll request when a burst
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 746fd9164e30..a4f22d8e6ac7 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -540,7 +540,6 @@ int efx_net_open(struct net_device *net_dev)
else
efx->state = STATE_NET_UP;
- efx_selftest_async_start(efx);
return 0;
}
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index cc30524c2fe4..361687de308d 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -544,6 +544,8 @@ void efx_start_all(struct efx_nic *efx)
/* Start the hardware monitor if there is one */
efx_start_monitor(efx);
+ efx_selftest_async_start(efx);
+
/* Link state detection is normally event-driven; we have
* to poll now because we could have missed a change
*/
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index f77511fe4e87..5f5a997f21f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -165,6 +165,18 @@ config DWMAC_SOCFPGA
for the stmmac device driver. This driver is used for
arria5 and cyclone5 FPGA SoCs.
+config DWMAC_STARFIVE
+ tristate "StarFive dwmac support"
+ depends on OF && (ARCH_STARFIVE || COMPILE_TEST)
+ select MFD_SYSCON
+ default m if ARCH_STARFIVE
+ help
+ Support for ethernet controllers on StarFive RISC-V SoCs
+
+ This selects the StarFive platform specific glue layer support for
+ the stmmac device driver. This driver is used for StarFive JH7110
+ ethernet controller.
+
config DWMAC_STI
tristate "STi GMAC support"
default ARCH_STI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 057e4bab5c08..8738fdbb4b2d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_DWMAC_OXNAS) += dwmac-oxnas.o
obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o
obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o
obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o
+obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o
obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o
obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o
obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index 2e8744ac6b91..fb55efd52240 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -14,9 +14,9 @@
#include "stmmac.h"
-static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb,
+ int csum)
{
- struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
unsigned int nopaged_len = skb_headlen(skb);
struct stmmac_priv *priv = tx_q->priv_data;
unsigned int entry = tx_q->cur_tx;
@@ -125,9 +125,8 @@ static void init_dma_chain(void *des, dma_addr_t phy_addr,
}
}
-static void refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(struct stmmac_rx_queue *rx_q, struct dma_desc *p)
{
- struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)priv_ptr;
struct stmmac_priv *priv = rx_q->priv_data;
if (priv->hwts_rx_en && !priv->extend_desc)
@@ -141,9 +140,8 @@ static void refill_desc3(void *priv_ptr, struct dma_desc *p)
sizeof(struct dma_desc)));
}
-static void clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(struct stmmac_tx_queue *tx_q, struct dma_desc *p)
{
- struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
struct stmmac_priv *priv = tx_q->priv_data;
unsigned int entry = tx_q->dirty_tx;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 54bb072aeb2d..4ad692c4116c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -242,7 +242,7 @@ struct stmmac_safety_stats {
#define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */
-/* DAM HW feature register fields */
+/* DMA HW feature register fields */
#define DMA_HW_FEAT_MIISEL 0x00000001 /* 10/100 Mbps Support */
#define DMA_HW_FEAT_GMIISEL 0x00000002 /* 1000 Mbps Support */
#define DMA_HW_FEAT_HDSEL 0x00000004 /* Half-Duplex Support */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index dfbaea06d108..9354bf419112 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -20,18 +20,18 @@
#define GMAC_CONFIG_INTF_RGMII (0x1 << 0)
struct anarion_gmac {
- uintptr_t ctl_block;
+ void __iomem *ctl_block;
uint32_t phy_intf_sel;
};
static uint32_t gmac_read_reg(struct anarion_gmac *gmac, uint8_t reg)
{
- return readl((void *)(gmac->ctl_block + reg));
+ return readl(gmac->ctl_block + reg);
};
static void gmac_write_reg(struct anarion_gmac *gmac, uint8_t reg, uint32_t val)
{
- writel(val, (void *)(gmac->ctl_block + reg));
+ writel(val, gmac->ctl_block + reg);
}
static int anarion_gmac_init(struct platform_device *pdev, void *priv)
@@ -68,16 +68,16 @@ static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
ctl_block = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(ctl_block)) {
- dev_err(&pdev->dev, "Cannot get reset region (%ld)!\n",
- PTR_ERR(ctl_block));
- return ctl_block;
+ err = PTR_ERR(ctl_block);
+ dev_err(&pdev->dev, "Cannot get reset region (%d)!\n", err);
+ return ERR_PTR(err);
}
gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
if (!gmac)
return ERR_PTR(-ENOMEM);
- gmac->ctl_block = (uintptr_t)ctl_block;
+ gmac->ctl_block = ctl_block;
err = of_get_phy_mode(pdev->dev.of_node, &phy_mode);
if (err)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 13aa919633b4..ab9f876b6df7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data)
priv->plat->mdio_bus_data->xpcs_an_inband = false;
} else {
priv->plat->max_speed = 1000;
- priv->plat->mdio_bus_data->xpcs_an_inband = true;
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index e8b507f88fbc..f6754e3643f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -263,6 +263,11 @@ static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac)
return 0;
}
+static void meson8b_clk_disable_unprepare(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac,
struct clk *clk)
{
@@ -273,8 +278,7 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac,
return ret;
return devm_add_action_or_reset(dwmac->dev,
- (void(*)(void *))clk_disable_unprepare,
- clk);
+ meson8b_clk_disable_unprepare, clk);
}
static int meson8b_init_rgmii_delays(struct meson8b_dwmac *dwmac)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 32763566c214..16a8c361283b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -11,6 +11,7 @@
#define RGMII_IO_MACRO_CONFIG 0x0
#define SDCC_HC_REG_DLL_CONFIG 0x4
+#define SDCC_TEST_CTL 0x8
#define SDCC_HC_REG_DDR_CONFIG 0xC
#define SDCC_HC_REG_DLL_CONFIG2 0x10
#define SDC4_STATUS 0x14
@@ -49,6 +50,7 @@
#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY GENMASK(26, 21)
#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE GENMASK(29, 27)
#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN BIT(30)
+#define SDCC_DDR_CONFIG_TCXO_CYCLES_CNT GENMASK(11, 9)
#define SDCC_DDR_CONFIG_PRG_RCLK_DLY GENMASK(8, 0)
/* SDCC_HC_REG_DLL_CONFIG2 fields */
@@ -78,7 +80,9 @@ struct ethqos_emac_por {
struct ethqos_emac_driver_data {
const struct ethqos_emac_por *por;
unsigned int num_por;
- bool rgmii_config_looback_en;
+ bool rgmii_config_loopback_en;
+ bool has_emac3;
+ struct dwmac4_addrs dwmac4_addrs;
};
struct qcom_ethqos {
@@ -91,7 +95,8 @@ struct qcom_ethqos {
const struct ethqos_emac_por *por;
unsigned int num_por;
- bool rgmii_config_looback_en;
+ bool rgmii_config_loopback_en;
+ bool has_emac3;
};
static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset)
@@ -183,7 +188,8 @@ static const struct ethqos_emac_por emac_v2_3_0_por[] = {
static const struct ethqos_emac_driver_data emac_v2_3_0_data = {
.por = emac_v2_3_0_por,
.num_por = ARRAY_SIZE(emac_v2_3_0_por),
- .rgmii_config_looback_en = true,
+ .rgmii_config_loopback_en = true,
+ .has_emac3 = false,
};
static const struct ethqos_emac_por emac_v2_1_0_por[] = {
@@ -198,7 +204,40 @@ static const struct ethqos_emac_por emac_v2_1_0_por[] = {
static const struct ethqos_emac_driver_data emac_v2_1_0_data = {
.por = emac_v2_1_0_por,
.num_por = ARRAY_SIZE(emac_v2_1_0_por),
- .rgmii_config_looback_en = false,
+ .rgmii_config_loopback_en = false,
+ .has_emac3 = false,
+};
+
+static const struct ethqos_emac_por emac_v3_0_0_por[] = {
+ { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40c01343 },
+ { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642c },
+ { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x80040800 },
+ { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 },
+ { .offset = SDCC_USR_CTL, .value = 0x00010800 },
+ { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 },
+};
+
+static const struct ethqos_emac_driver_data emac_v3_0_0_data = {
+ .por = emac_v3_0_0_por,
+ .num_por = ARRAY_SIZE(emac_v3_0_0_por),
+ .rgmii_config_loopback_en = false,
+ .has_emac3 = true,
+ .dwmac4_addrs = {
+ .dma_chan = 0x00008100,
+ .dma_chan_offset = 0x1000,
+ .mtl_chan = 0x00008000,
+ .mtl_chan_offset = 0x1000,
+ .mtl_ets_ctrl = 0x00008010,
+ .mtl_ets_ctrl_offset = 0x1000,
+ .mtl_txq_weight = 0x00008018,
+ .mtl_txq_weight_offset = 0x1000,
+ .mtl_send_slp_cred = 0x0000801c,
+ .mtl_send_slp_cred_offset = 0x1000,
+ .mtl_high_cred = 0x00008020,
+ .mtl_high_cred_offset = 0x1000,
+ .mtl_low_cred = 0x00008024,
+ .mtl_low_cred_offset = 0x1000,
+ },
};
static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
@@ -222,11 +261,13 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN,
SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG);
- rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN,
- 0, SDCC_HC_REG_DLL_CONFIG);
+ if (!ethqos->has_emac3) {
+ rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN,
+ 0, SDCC_HC_REG_DLL_CONFIG);
- rgmii_updatel(ethqos, SDCC_DLL_CDR_FINE_PHASE,
- 0, SDCC_HC_REG_DLL_CONFIG);
+ rgmii_updatel(ethqos, SDCC_DLL_CDR_FINE_PHASE,
+ 0, SDCC_HC_REG_DLL_CONFIG);
+ }
/* Wait for CK_OUT_EN clear */
do {
@@ -261,28 +302,48 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN,
SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_HC_REG_DLL_CONFIG2);
- rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS,
- 0, SDCC_HC_REG_DLL_CONFIG2);
+ if (!ethqos->has_emac3) {
+ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS,
+ 0, SDCC_HC_REG_DLL_CONFIG2);
- rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_MCLK_FREQ_CALC,
- 0x1A << 10, SDCC_HC_REG_DLL_CONFIG2);
+ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_MCLK_FREQ_CALC,
+ 0x1A << 10, SDCC_HC_REG_DLL_CONFIG2);
- rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL,
- BIT(2), SDCC_HC_REG_DLL_CONFIG2);
+ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL,
+ BIT(2), SDCC_HC_REG_DLL_CONFIG2);
- rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
- SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
- SDCC_HC_REG_DLL_CONFIG2);
+ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
+ SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
+ SDCC_HC_REG_DLL_CONFIG2);
+ }
return 0;
}
static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
{
+ int phase_shift;
+ int phy_mode;
+ int loopback;
+
+ /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */
+ phy_mode = device_get_phy_mode(&ethqos->pdev->dev);
+ if (phy_mode == PHY_INTERFACE_MODE_RGMII_ID ||
+ phy_mode == PHY_INTERFACE_MODE_RGMII_TXID)
+ phase_shift = 0;
+ else
+ phase_shift = RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN;
+
/* Disable loopback mode */
rgmii_updatel(ethqos, RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN,
0, RGMII_IO_MACRO_CONFIG2);
+ /* Determine if this platform wants loopback enabled after programming */
+ if (ethqos->rgmii_config_loopback_en)
+ loopback = RGMII_CONFIG_LOOPBACK_EN;
+ else
+ loopback = 0;
+
/* Select RGMII, write 0 to interface select */
rgmii_updatel(ethqos, RGMII_CONFIG_INTF_SEL,
0, RGMII_IO_MACRO_CONFIG);
@@ -300,27 +361,32 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
RGMII_CONFIG_PROG_SWAP, RGMII_IO_MACRO_CONFIG);
rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
0, RGMII_IO_MACRO_CONFIG2);
+
rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
- RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
- RGMII_IO_MACRO_CONFIG2);
+ phase_shift, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
0, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
RGMII_CONFIG2_RX_PROG_SWAP,
RGMII_IO_MACRO_CONFIG2);
- /* Set PRG_RCLK_DLY to 57 for 1.8 ns delay */
- rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY,
- 57, SDCC_HC_REG_DDR_CONFIG);
+ /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns,
+ * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns
+ */
+ if (ethqos->has_emac3) {
+ /* 0.9 ns */
+ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY,
+ 115, SDCC_HC_REG_DDR_CONFIG);
+ } else {
+ /* 1.8 ns */
+ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY,
+ 57, SDCC_HC_REG_DDR_CONFIG);
+ }
rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN,
SDCC_DDR_CONFIG_PRG_DLY_EN,
SDCC_HC_REG_DDR_CONFIG);
- if (ethqos->rgmii_config_looback_en)
- rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
- RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
- else
- rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
- 0, RGMII_IO_MACRO_CONFIG);
+ rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
+ loopback, RGMII_IO_MACRO_CONFIG);
break;
case SPEED_100:
@@ -336,14 +402,20 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
0, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
- RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
- RGMII_IO_MACRO_CONFIG2);
+ phase_shift, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2,
BIT(6), RGMII_IO_MACRO_CONFIG);
rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
0, RGMII_IO_MACRO_CONFIG2);
- rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
- 0, RGMII_IO_MACRO_CONFIG2);
+
+ if (ethqos->has_emac3)
+ rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+ RGMII_CONFIG2_RX_PROG_SWAP,
+ RGMII_IO_MACRO_CONFIG2);
+ else
+ rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+ 0, RGMII_IO_MACRO_CONFIG2);
+
/* Write 0x5 to PRG_RCLK_DLY_CODE */
rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE,
(BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG);
@@ -353,13 +425,8 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
SDCC_HC_REG_DDR_CONFIG);
- if (ethqos->rgmii_config_looback_en)
- rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
- RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
- else
- rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
- 0, RGMII_IO_MACRO_CONFIG);
-
+ rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
+ loopback, RGMII_IO_MACRO_CONFIG);
break;
case SPEED_10:
@@ -375,14 +442,19 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
0, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
- 0, RGMII_IO_MACRO_CONFIG2);
+ phase_shift, RGMII_IO_MACRO_CONFIG2);
rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9,
BIT(12) | GENMASK(9, 8),
RGMII_IO_MACRO_CONFIG);
rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
0, RGMII_IO_MACRO_CONFIG2);
- rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
- 0, RGMII_IO_MACRO_CONFIG2);
+ if (ethqos->has_emac3)
+ rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+ RGMII_CONFIG2_RX_PROG_SWAP,
+ RGMII_IO_MACRO_CONFIG2);
+ else
+ rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+ 0, RGMII_IO_MACRO_CONFIG2);
/* Write 0x5 to PRG_RCLK_DLY_CODE */
rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE,
(BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG);
@@ -393,7 +465,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
SDCC_HC_REG_DDR_CONFIG);
rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
- RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
+ loopback, RGMII_IO_MACRO_CONFIG);
break;
default:
dev_err(&ethqos->pdev->dev,
@@ -425,6 +497,17 @@ static int ethqos_configure(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN,
SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG);
+ if (ethqos->has_emac3) {
+ if (ethqos->speed == SPEED_1000) {
+ rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL);
+ rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL);
+ rgmii_writel(ethqos, 0xA001, SDCC_HC_REG_DLL_CONFIG2);
+ } else {
+ rgmii_writel(ethqos, 0x40010800, SDCC_USR_CTL);
+ rgmii_writel(ethqos, 0xA001, SDCC_HC_REG_DLL_CONFIG2);
+ }
+ }
+
/* Clear DLL_RST */
rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_RST, 0,
SDCC_HC_REG_DLL_CONFIG);
@@ -444,7 +527,9 @@ static int ethqos_configure(struct qcom_ethqos *ethqos)
SDCC_HC_REG_DLL_CONFIG);
/* Set USR_CTL bit 26 with mask of 3 bits */
- rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26), SDCC_USR_CTL);
+ if (!ethqos->has_emac3)
+ rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26),
+ SDCC_USR_CTL);
/* wait for DLL LOCK */
do {
@@ -538,7 +623,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
data = of_device_get_match_data(&pdev->dev);
ethqos->por = data->por;
ethqos->num_por = data->num_por;
- ethqos->rgmii_config_looback_en = data->rgmii_config_looback_en;
+ ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en;
+ ethqos->has_emac3 = data->has_emac3;
ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii");
if (IS_ERR(ethqos->rgmii_clk)) {
@@ -558,6 +644,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->fix_mac_speed = ethqos_fix_mac_speed;
plat_dat->dump_debug_regs = rgmii_dump;
plat_dat->has_gmac4 = 1;
+ plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
plat_dat->pmt = 1;
plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
@@ -595,6 +682,7 @@ static int qcom_ethqos_remove(struct platform_device *pdev)
static const struct of_device_id qcom_ethqos_match[] = {
{ .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data},
+ { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data},
{ .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data},
{ }
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 4b8fd11563e4..4ea31ccf24d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -39,6 +39,24 @@ struct rk_gmac_ops {
u32 regs[];
};
+static const char * const rk_clocks[] = {
+ "aclk_mac", "pclk_mac", "mac_clk_tx", "clk_mac_speed",
+};
+
+static const char * const rk_rmii_clocks[] = {
+ "mac_clk_rx", "clk_mac_ref", "clk_mac_refout",
+};
+
+enum rk_clocks_index {
+ RK_ACLK_MAC = 0,
+ RK_PCLK_MAC,
+ RK_MAC_CLK_TX,
+ RK_CLK_MAC_SPEED,
+ RK_MAC_CLK_RX,
+ RK_CLK_MAC_REF,
+ RK_CLK_MAC_REFOUT,
+};
+
struct rk_priv_data {
struct platform_device *pdev;
phy_interface_t phy_iface;
@@ -51,15 +69,9 @@ struct rk_priv_data {
bool clock_input;
bool integrated_phy;
+ struct clk_bulk_data *clks;
+ int num_clks;
struct clk *clk_mac;
- struct clk *gmac_clkin;
- struct clk *mac_clk_rx;
- struct clk *mac_clk_tx;
- struct clk *clk_mac_ref;
- struct clk *clk_mac_refout;
- struct clk *clk_mac_speed;
- struct clk *aclk_mac;
- struct clk *pclk_mac;
struct clk *clk_phy;
struct reset_control *phy_reset;
@@ -104,10 +116,11 @@ static void px30_set_to_rmii(struct rk_priv_data *bsp_priv)
static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
{
+ struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
int ret;
- if (IS_ERR(bsp_priv->clk_mac_speed)) {
+ if (!clk_mac_speed) {
dev_err(dev, "%s: Missing clk_mac_speed clock\n", __func__);
return;
}
@@ -116,7 +129,7 @@ static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
PX30_GMAC_SPEED_10M);
- ret = clk_set_rate(bsp_priv->clk_mac_speed, 2500000);
+ ret = clk_set_rate(clk_mac_speed, 2500000);
if (ret)
dev_err(dev, "%s: set clk_mac_speed rate 2500000 failed: %d\n",
__func__, ret);
@@ -124,7 +137,7 @@ static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
PX30_GMAC_SPEED_100M);
- ret = clk_set_rate(bsp_priv->clk_mac_speed, 25000000);
+ ret = clk_set_rate(clk_mac_speed, 25000000);
if (ret)
dev_err(dev, "%s: set clk_mac_speed rate 25000000 failed: %d\n",
__func__, ret);
@@ -1066,6 +1079,7 @@ static void rk3568_set_to_rmii(struct rk_priv_data *bsp_priv)
static void rk3568_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed)
{
+ struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
unsigned long rate;
int ret;
@@ -1085,7 +1099,7 @@ static void rk3568_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed)
return;
}
- ret = clk_set_rate(bsp_priv->clk_mac_speed, rate);
+ ret = clk_set_rate(clk_mac_speed, rate);
if (ret)
dev_err(dev, "%s: set clk_mac_speed rate %ld failed %d\n",
__func__, rate, ret);
@@ -1371,6 +1385,7 @@ static void rv1126_set_to_rmii(struct rk_priv_data *bsp_priv)
static void rv1126_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
{
+ struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
unsigned long rate;
int ret;
@@ -1390,7 +1405,7 @@ static void rv1126_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
return;
}
- ret = clk_set_rate(bsp_priv->clk_mac_speed, rate);
+ ret = clk_set_rate(clk_mac_speed, rate);
if (ret)
dev_err(dev, "%s: set clk_mac_speed rate %ld failed %d\n",
__func__, rate, ret);
@@ -1398,6 +1413,7 @@ static void rv1126_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
static void rv1126_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
{
+ struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
unsigned long rate;
int ret;
@@ -1414,7 +1430,7 @@ static void rv1126_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
return;
}
- ret = clk_set_rate(bsp_priv->clk_mac_speed, rate);
+ ret = clk_set_rate(clk_mac_speed, rate);
if (ret)
dev_err(dev, "%s: set clk_mac_speed rate %ld failed %d\n",
__func__, rate, ret);
@@ -1475,68 +1491,50 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
{
struct rk_priv_data *bsp_priv = plat->bsp_priv;
struct device *dev = &bsp_priv->pdev->dev;
- int ret;
+ int phy_iface = bsp_priv->phy_iface;
+ int i, j, ret;
bsp_priv->clk_enabled = false;
- bsp_priv->mac_clk_rx = devm_clk_get(dev, "mac_clk_rx");
- if (IS_ERR(bsp_priv->mac_clk_rx))
- dev_err(dev, "cannot get clock %s\n",
- "mac_clk_rx");
+ bsp_priv->num_clks = ARRAY_SIZE(rk_clocks);
+ if (phy_iface == PHY_INTERFACE_MODE_RMII)
+ bsp_priv->num_clks += ARRAY_SIZE(rk_rmii_clocks);
- bsp_priv->mac_clk_tx = devm_clk_get(dev, "mac_clk_tx");
- if (IS_ERR(bsp_priv->mac_clk_tx))
- dev_err(dev, "cannot get clock %s\n",
- "mac_clk_tx");
+ bsp_priv->clks = devm_kcalloc(dev, bsp_priv->num_clks,
+ sizeof(*bsp_priv->clks), GFP_KERNEL);
+ if (!bsp_priv->clks)
+ return -ENOMEM;
- bsp_priv->aclk_mac = devm_clk_get(dev, "aclk_mac");
- if (IS_ERR(bsp_priv->aclk_mac))
- dev_err(dev, "cannot get clock %s\n",
- "aclk_mac");
+ for (i = 0; i < ARRAY_SIZE(rk_clocks); i++)
+ bsp_priv->clks[i].id = rk_clocks[i];
- bsp_priv->pclk_mac = devm_clk_get(dev, "pclk_mac");
- if (IS_ERR(bsp_priv->pclk_mac))
- dev_err(dev, "cannot get clock %s\n",
- "pclk_mac");
-
- bsp_priv->clk_mac = devm_clk_get(dev, "stmmaceth");
- if (IS_ERR(bsp_priv->clk_mac))
- dev_err(dev, "cannot get clock %s\n",
- "stmmaceth");
-
- if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) {
- bsp_priv->clk_mac_ref = devm_clk_get(dev, "clk_mac_ref");
- if (IS_ERR(bsp_priv->clk_mac_ref))
- dev_err(dev, "cannot get clock %s\n",
- "clk_mac_ref");
-
- if (!bsp_priv->clock_input) {
- bsp_priv->clk_mac_refout =
- devm_clk_get(dev, "clk_mac_refout");
- if (IS_ERR(bsp_priv->clk_mac_refout))
- dev_err(dev, "cannot get clock %s\n",
- "clk_mac_refout");
- }
+ if (phy_iface == PHY_INTERFACE_MODE_RMII) {
+ for (j = 0; j < ARRAY_SIZE(rk_rmii_clocks); j++)
+ bsp_priv->clks[i++].id = rk_rmii_clocks[j];
}
- bsp_priv->clk_mac_speed = devm_clk_get(dev, "clk_mac_speed");
- if (IS_ERR(bsp_priv->clk_mac_speed))
- dev_err(dev, "cannot get clock %s\n", "clk_mac_speed");
+ ret = devm_clk_bulk_get_optional(dev, bsp_priv->num_clks,
+ bsp_priv->clks);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to get clocks\n");
+
+ /* "stmmaceth" will be enabled by the core */
+ bsp_priv->clk_mac = devm_clk_get(dev, "stmmaceth");
+ ret = PTR_ERR_OR_ZERO(bsp_priv->clk_mac);
+ if (ret)
+ return dev_err_probe(dev, ret, "Cannot get stmmaceth clock\n");
if (bsp_priv->clock_input) {
dev_info(dev, "clock input from PHY\n");
- } else {
- if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
- clk_set_rate(bsp_priv->clk_mac, 50000000);
+ } else if (phy_iface == PHY_INTERFACE_MODE_RMII) {
+ clk_set_rate(bsp_priv->clk_mac, 50000000);
}
if (plat->phy_node && bsp_priv->integrated_phy) {
bsp_priv->clk_phy = of_clk_get(plat->phy_node, 0);
- if (IS_ERR(bsp_priv->clk_phy)) {
- ret = PTR_ERR(bsp_priv->clk_phy);
- dev_err(dev, "Cannot get PHY clock: %d\n", ret);
- return -EINVAL;
- }
+ ret = PTR_ERR_OR_ZERO(bsp_priv->clk_phy);
+ if (ret)
+ return dev_err_probe(dev, ret, "Cannot get PHY clock\n");
clk_set_rate(bsp_priv->clk_phy, 50000000);
}
@@ -1545,77 +1543,36 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
{
- int phy_iface = bsp_priv->phy_iface;
+ int ret;
if (enable) {
if (!bsp_priv->clk_enabled) {
- if (phy_iface == PHY_INTERFACE_MODE_RMII) {
- if (!IS_ERR(bsp_priv->mac_clk_rx))
- clk_prepare_enable(
- bsp_priv->mac_clk_rx);
-
- if (!IS_ERR(bsp_priv->clk_mac_ref))
- clk_prepare_enable(
- bsp_priv->clk_mac_ref);
-
- if (!IS_ERR(bsp_priv->clk_mac_refout))
- clk_prepare_enable(
- bsp_priv->clk_mac_refout);
- }
-
- if (!IS_ERR(bsp_priv->clk_phy))
- clk_prepare_enable(bsp_priv->clk_phy);
+ ret = clk_bulk_prepare_enable(bsp_priv->num_clks,
+ bsp_priv->clks);
+ if (ret)
+ return ret;
- if (!IS_ERR(bsp_priv->aclk_mac))
- clk_prepare_enable(bsp_priv->aclk_mac);
-
- if (!IS_ERR(bsp_priv->pclk_mac))
- clk_prepare_enable(bsp_priv->pclk_mac);
-
- if (!IS_ERR(bsp_priv->mac_clk_tx))
- clk_prepare_enable(bsp_priv->mac_clk_tx);
-
- if (!IS_ERR(bsp_priv->clk_mac_speed))
- clk_prepare_enable(bsp_priv->clk_mac_speed);
+ ret = clk_prepare_enable(bsp_priv->clk_phy);
+ if (ret)
+ return ret;
if (bsp_priv->ops && bsp_priv->ops->set_clock_selection)
bsp_priv->ops->set_clock_selection(bsp_priv,
bsp_priv->clock_input, true);
- /**
- * if (!IS_ERR(bsp_priv->clk_mac))
- * clk_prepare_enable(bsp_priv->clk_mac);
- */
mdelay(5);
bsp_priv->clk_enabled = true;
}
} else {
if (bsp_priv->clk_enabled) {
- if (phy_iface == PHY_INTERFACE_MODE_RMII) {
- clk_disable_unprepare(bsp_priv->mac_clk_rx);
-
- clk_disable_unprepare(bsp_priv->clk_mac_ref);
-
- clk_disable_unprepare(bsp_priv->clk_mac_refout);
- }
-
+ clk_bulk_disable_unprepare(bsp_priv->num_clks,
+ bsp_priv->clks);
clk_disable_unprepare(bsp_priv->clk_phy);
- clk_disable_unprepare(bsp_priv->aclk_mac);
-
- clk_disable_unprepare(bsp_priv->pclk_mac);
-
- clk_disable_unprepare(bsp_priv->mac_clk_tx);
-
- clk_disable_unprepare(bsp_priv->clk_mac_speed);
-
if (bsp_priv->ops && bsp_priv->ops->set_clock_selection)
bsp_priv->ops->set_clock_selection(bsp_priv,
bsp_priv->clock_input, false);
- /**
- * if (!IS_ERR(bsp_priv->clk_mac))
- * clk_disable_unprepare(bsp_priv->clk_mac);
- */
+
bsp_priv->clk_enabled = false;
}
}
@@ -1629,9 +1586,6 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
int ret;
struct device *dev = &bsp_priv->pdev->dev;
- if (!ldo)
- return 0;
-
if (enable) {
ret = regulator_enable(ldo);
if (ret)
@@ -1679,14 +1633,11 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
}
}
- bsp_priv->regulator = devm_regulator_get_optional(dev, "phy");
+ bsp_priv->regulator = devm_regulator_get(dev, "phy");
if (IS_ERR(bsp_priv->regulator)) {
- if (PTR_ERR(bsp_priv->regulator) == -EPROBE_DEFER) {
- dev_err(dev, "phy regulator is not available yet, deferred probing\n");
- return ERR_PTR(-EPROBE_DEFER);
- }
- dev_err(dev, "no regulator found\n");
- bsp_priv->regulator = NULL;
+ ret = PTR_ERR(bsp_priv->regulator);
+ dev_err_probe(dev, ret, "failed to get phy regulator\n");
+ return ERR_PTR(ret);
}
ret = of_property_read_string(dev->of_node, "clock_in_out", &strings);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
new file mode 100644
index 000000000000..4f51a7889642
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * StarFive DWMAC platform driver
+ *
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ * Copyright (C) 2022 StarFive Technology Co., Ltd.
+ *
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include "stmmac_platform.h"
+
+#define STARFIVE_DWMAC_PHY_INFT_RGMII 0x1
+#define STARFIVE_DWMAC_PHY_INFT_RMII 0x4
+#define STARFIVE_DWMAC_PHY_INFT_FIELD 0x7U
+
+struct starfive_dwmac {
+ struct device *dev;
+ struct clk *clk_tx;
+};
+
+static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed)
+{
+ struct starfive_dwmac *dwmac = priv;
+ unsigned long rate;
+ int err;
+
+ rate = clk_get_rate(dwmac->clk_tx);
+
+ switch (speed) {
+ case SPEED_1000:
+ rate = 125000000;
+ break;
+ case SPEED_100:
+ rate = 25000000;
+ break;
+ case SPEED_10:
+ rate = 2500000;
+ break;
+ default:
+ dev_err(dwmac->dev, "invalid speed %u\n", speed);
+ break;
+ }
+
+ err = clk_set_rate(dwmac->clk_tx, rate);
+ if (err)
+ dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate);
+}
+
+static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+ struct starfive_dwmac *dwmac = plat_dat->bsp_priv;
+ struct regmap *regmap;
+ unsigned int args[2];
+ unsigned int mode;
+ int err;
+
+ switch (plat_dat->interface) {
+ case PHY_INTERFACE_MODE_RMII:
+ mode = STARFIVE_DWMAC_PHY_INFT_RMII;
+ break;
+
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ mode = STARFIVE_DWMAC_PHY_INFT_RGMII;
+ break;
+
+ default:
+ dev_err(dwmac->dev, "unsupported interface %d\n",
+ plat_dat->interface);
+ return -EINVAL;
+ }
+
+ regmap = syscon_regmap_lookup_by_phandle_args(dwmac->dev->of_node,
+ "starfive,syscon",
+ 2, args);
+ if (IS_ERR(regmap))
+ return dev_err_probe(dwmac->dev, PTR_ERR(regmap), "getting the regmap failed\n");
+
+ /* args[0]:offset args[1]: shift */
+ err = regmap_update_bits(regmap, args[0],
+ STARFIVE_DWMAC_PHY_INFT_FIELD << args[1],
+ mode << args[1]);
+ if (err)
+ return dev_err_probe(dwmac->dev, err, "error setting phy mode\n");
+
+ return 0;
+}
+
+static int starfive_dwmac_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
+ struct starfive_dwmac *dwmac;
+ struct clk *clk_gtx;
+ int err;
+
+ err = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "failed to get resources\n");
+
+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ if (IS_ERR(plat_dat))
+ return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat),
+ "dt configuration failed\n");
+
+ dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+ if (!dwmac)
+ return -ENOMEM;
+
+ dwmac->clk_tx = devm_clk_get_enabled(&pdev->dev, "tx");
+ if (IS_ERR(dwmac->clk_tx))
+ return dev_err_probe(&pdev->dev, PTR_ERR(dwmac->clk_tx),
+ "error getting tx clock\n");
+
+ clk_gtx = devm_clk_get_enabled(&pdev->dev, "gtx");
+ if (IS_ERR(clk_gtx))
+ return dev_err_probe(&pdev->dev, PTR_ERR(clk_gtx),
+ "error getting gtx clock\n");
+
+ /* Generally, the rgmii_tx clock is provided by the internal clock,
+ * which needs to match the corresponding clock frequency according
+ * to different speeds. If the rgmii_tx clock is provided by the
+ * external rgmii_rxin, there is no need to configure the clock
+ * internally, because rgmii_rxin will be adaptively adjusted.
+ */
+ if (!device_property_read_bool(&pdev->dev, "starfive,tx-use-rgmii-clk"))
+ plat_dat->fix_mac_speed = starfive_dwmac_fix_mac_speed;
+
+ dwmac->dev = &pdev->dev;
+ plat_dat->bsp_priv = dwmac;
+ plat_dat->dma_cfg->dche = true;
+
+ err = starfive_dwmac_set_mode(plat_dat);
+ if (err)
+ return err;
+
+ err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ if (err) {
+ stmmac_remove_config_dt(pdev, plat_dat);
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id starfive_dwmac_match[] = {
+ { .compatible = "starfive,jh7110-dwmac" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, starfive_dwmac_match);
+
+static struct platform_driver starfive_dwmac_driver = {
+ .probe = starfive_dwmac_probe,
+ .remove = stmmac_pltfr_remove,
+ .driver = {
+ .name = "starfive-dwmac",
+ .pm = &stmmac_pltfr_pm_ops,
+ .of_match_table = starfive_dwmac_match,
+ },
+};
+module_platform_driver(starfive_dwmac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("StarFive DWMAC platform driver");
+MODULE_AUTHOR("Emil Renner Berthing <kernel@esmil.dk>");
+MODULE_AUTHOR("Samin Guo <samin.guo@starfivetech.com>");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index be3b1ebc06ab..465ce66ef9c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -35,7 +35,7 @@
#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \
iface == PHY_INTERFACE_MODE_GMII)
-/* STiH4xx register definitions (STiH415/STiH416/STiH407/STiH410 families)
+/* STiH4xx register definitions (STiH407/STiH410 families)
*
* Below table summarizes the clock requirement and clock sources for
* supported phy interface modes with link speeds.
@@ -75,27 +75,6 @@
#define STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7)
#define STIH4XX_ETH_SEL_TXCLK_NOT_CLK125 BIT(6)
-/* STiD127 register definitions
- *-----------------------
- * src |BIT(6)| BIT(7)|
- *-----------------------
- * MII | 1 | n/a |
- *-----------------------
- * RMII | n/a | 1 |
- * clkgen| | |
- *-----------------------
- * RMII | n/a | 0 |
- * phyclk| | |
- *-----------------------
- * RGMII | 1 | n/a |
- * clkgen| | |
- *-----------------------
- */
-
-#define STID127_RETIME_SRC_MASK GENMASK(7, 6)
-#define STID127_ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7)
-#define STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK BIT(6)
-
#define ENMII_MASK GENMASK(5, 5)
#define ENMII BIT(5)
#define EN_MASK GENMASK(1, 1)
@@ -194,36 +173,6 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd)
stih4xx_tx_retime_val[src]);
}
-static void stid127_fix_retime_src(void *priv, u32 spd)
-{
- struct sti_dwmac *dwmac = priv;
- u32 reg = dwmac->ctrl_reg;
- u32 freq = 0;
- u32 val = 0;
-
- if (dwmac->interface == PHY_INTERFACE_MODE_MII) {
- val = STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK;
- } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) {
- if (!dwmac->ext_phyclk) {
- val = STID127_ETH_SEL_INTERNAL_NOTEXT_PHYCLK;
- freq = DWMAC_50MHZ;
- }
- } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) {
- val = STID127_ETH_SEL_INTERNAL_NOTEXT_TXCLK;
- if (spd == SPEED_1000)
- freq = DWMAC_125MHZ;
- else if (spd == SPEED_100)
- freq = DWMAC_25MHZ;
- else if (spd == SPEED_10)
- freq = DWMAC_2_5MHZ;
- }
-
- if (freq)
- clk_set_rate(dwmac->clk, freq);
-
- regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val);
-}
-
static int sti_dwmac_set_mode(struct sti_dwmac *dwmac)
{
struct regmap *regmap = dwmac->regmap;
@@ -408,14 +357,7 @@ static const struct sti_dwmac_of_data stih4xx_dwmac_data = {
.fix_retime_src = stih4xx_fix_retime_src,
};
-static const struct sti_dwmac_of_data stid127_dwmac_data = {
- .fix_retime_src = stid127_fix_retime_src,
-};
-
static const struct of_device_id sti_dwmac_match[] = {
- { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data},
- { .compatible = "st,stih416-dwmac", .data = &stih4xx_dwmac_data},
- { .compatible = "st,stid127-dwmac", .data = &stid127_dwmac_data},
{ .compatible = "st,stih407-dwmac", .data = &stih4xx_dwmac_data},
{ }
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index f834472599f7..c2c592ba0eb8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -304,7 +304,8 @@ static void sun8i_dwmac_dma_init(void __iomem *ioaddr,
writel(0x1FFFFFF, ioaddr + EMAC_INT_STA);
}
-static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr,
+static void sun8i_dwmac_dma_init_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_rx_phy, u32 chan)
{
@@ -312,7 +313,8 @@ static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr,
writel(lower_32_bits(dma_rx_phy), ioaddr + EMAC_RX_DESC_LIST);
}
-static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr,
+static void sun8i_dwmac_dma_init_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_tx_phy, u32 chan)
{
@@ -324,7 +326,8 @@ static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr,
* Called from stmmac_dma_ops->dump_regs
* Used for ethtool
*/
-static void sun8i_dwmac_dump_regs(void __iomem *ioaddr, u32 *reg_space)
+static void sun8i_dwmac_dump_regs(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 *reg_space)
{
int i;
@@ -352,7 +355,8 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw,
}
}
-static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+static void sun8i_dwmac_enable_dma_irq(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan,
bool rx, bool tx)
{
u32 value = readl(ioaddr + EMAC_INT_EN);
@@ -365,7 +369,8 @@ static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan,
writel(value, ioaddr + EMAC_INT_EN);
}
-static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+static void sun8i_dwmac_disable_dma_irq(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan,
bool rx, bool tx)
{
u32 value = readl(ioaddr + EMAC_INT_EN);
@@ -378,7 +383,8 @@ static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan,
writel(value, ioaddr + EMAC_INT_EN);
}
-static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_dma_start_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 v;
@@ -398,7 +404,8 @@ static void sun8i_dwmac_enable_dma_transmission(void __iomem *ioaddr)
writel(v, ioaddr + EMAC_TX_CTL1);
}
-static void sun8i_dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_dma_stop_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 v;
@@ -407,7 +414,8 @@ static void sun8i_dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
writel(v, ioaddr + EMAC_TX_CTL1);
}
-static void sun8i_dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_dma_start_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 v;
@@ -417,7 +425,8 @@ static void sun8i_dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
writel(v, ioaddr + EMAC_RX_CTL1);
}
-static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_dma_stop_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 v;
@@ -426,7 +435,8 @@ static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
writel(v, ioaddr + EMAC_RX_CTL1);
}
-static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr,
+static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
@@ -492,7 +502,8 @@ static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr,
return ret;
}
-static void sun8i_dwmac_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+static void sun8i_dwmac_dma_operation_mode_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
u32 v;
@@ -515,7 +526,8 @@ static void sun8i_dwmac_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
writel(v, ioaddr + EMAC_RX_CTL1);
}
-static void sun8i_dwmac_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+static void sun8i_dwmac_dma_operation_mode_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
u32 v;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 0e00dd83d027..3927609abc44 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -414,7 +414,8 @@ static void dwmac1000_get_adv_lp(void __iomem *ioaddr, struct rgmii_adv *adv)
dwmac_get_adv_lp(ioaddr, GMAC_PCS_BASE, adv);
}
-static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+static void dwmac1000_debug(struct stmmac_priv *priv, void __iomem *ioaddr,
+ struct stmmac_extra_stats *x,
u32 rx_queues, u32 tx_queues)
{
u32 value = readl(ioaddr + GMAC_DEBUG);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index f5581db0ba9b..daf79cdbd3ec 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -110,7 +110,8 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
}
-static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
+static void dwmac1000_dma_init_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_rx_phy, u32 chan)
{
@@ -118,7 +119,8 @@ static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
}
-static void dwmac1000_dma_init_tx(void __iomem *ioaddr,
+static void dwmac1000_dma_init_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_tx_phy, u32 chan)
{
@@ -147,7 +149,8 @@ static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
return csr6;
}
-static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+static void dwmac1000_dma_operation_mode_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
u32 csr6 = readl(ioaddr + DMA_CONTROL);
@@ -175,7 +178,8 @@ static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
writel(csr6, ioaddr + DMA_CONTROL);
}
-static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+static void dwmac1000_dma_operation_mode_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
u32 csr6 = readl(ioaddr + DMA_CONTROL);
@@ -208,7 +212,8 @@ static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
writel(csr6, ioaddr + DMA_CONTROL);
}
-static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
+static void dwmac1000_dump_dma_regs(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 *reg_space)
{
int i;
@@ -263,8 +268,8 @@ static int dwmac1000_get_hw_feature(void __iomem *ioaddr,
return 0;
}
-static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
- u32 queue)
+static void dwmac1000_rx_watchdog(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 riwt, u32 queue)
{
writel(riwt, ioaddr + DMA_RX_WATCHDOG);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 8f0d9bc7cab5..1c32b1788f02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -29,7 +29,7 @@ static void dwmac100_dma_init(void __iomem *ioaddr,
writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
}
-static void dwmac100_dma_init_rx(void __iomem *ioaddr,
+static void dwmac100_dma_init_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_rx_phy, u32 chan)
{
@@ -37,7 +37,7 @@ static void dwmac100_dma_init_rx(void __iomem *ioaddr,
writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
}
-static void dwmac100_dma_init_tx(void __iomem *ioaddr,
+static void dwmac100_dma_init_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_tx_phy, u32 chan)
{
@@ -50,7 +50,8 @@ static void dwmac100_dma_init_tx(void __iomem *ioaddr,
* The transmit threshold can be programmed by setting the TTC bits in the DMA
* control register.
*/
-static void dwmac100_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+static void dwmac100_dma_operation_mode_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
u32 csr6 = readl(ioaddr + DMA_CONTROL);
@@ -65,7 +66,8 @@ static void dwmac100_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
writel(csr6, ioaddr + DMA_CONTROL);
}
-static void dwmac100_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
+static void dwmac100_dump_dma_regs(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 *reg_space)
{
int i;
@@ -80,10 +82,10 @@ static void dwmac100_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
}
/* DMA controller has two counters to track the number of the missed frames. */
-static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x,
+static void dwmac100_dma_diagnostic_fr(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
void __iomem *ioaddr)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR);
if (unlikely(csr8)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index ccd49346d3b3..4538f334df57 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -336,14 +336,25 @@ enum power_event {
#define MTL_CHAN_BASE_ADDR 0x00000d00
#define MTL_CHAN_BASE_OFFSET 0x40
-#define MTL_CHANX_BASE_ADDR(x) (MTL_CHAN_BASE_ADDR + \
- (x * MTL_CHAN_BASE_OFFSET))
-#define MTL_CHAN_TX_OP_MODE(x) MTL_CHANX_BASE_ADDR(x)
-#define MTL_CHAN_TX_DEBUG(x) (MTL_CHANX_BASE_ADDR(x) + 0x8)
-#define MTL_CHAN_INT_CTRL(x) (MTL_CHANX_BASE_ADDR(x) + 0x2c)
-#define MTL_CHAN_RX_OP_MODE(x) (MTL_CHANX_BASE_ADDR(x) + 0x30)
-#define MTL_CHAN_RX_DEBUG(x) (MTL_CHANX_BASE_ADDR(x) + 0x38)
+static inline u32 mtl_chanx_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_chan + (x * addrs->mtl_chan_offset);
+ else
+ addr = MTL_CHAN_BASE_ADDR + (x * MTL_CHAN_BASE_OFFSET);
+
+ return addr;
+}
+
+#define MTL_CHAN_TX_OP_MODE(addrs, x) mtl_chanx_base_addr(addrs, x)
+#define MTL_CHAN_TX_DEBUG(addrs, x) (mtl_chanx_base_addr(addrs, x) + 0x8)
+#define MTL_CHAN_INT_CTRL(addrs, x) (mtl_chanx_base_addr(addrs, x) + 0x2c)
+#define MTL_CHAN_RX_OP_MODE(addrs, x) (mtl_chanx_base_addr(addrs, x) + 0x30)
+#define MTL_CHAN_RX_DEBUG(addrs, x) (mtl_chanx_base_addr(addrs, x) + 0x38)
#define MTL_OP_MODE_RSF BIT(5)
#define MTL_OP_MODE_TXQEN_MASK GENMASK(3, 2)
@@ -388,8 +399,19 @@ enum power_event {
/* MTL ETS Control register */
#define MTL_ETS_CTRL_BASE_ADDR 0x00000d10
#define MTL_ETS_CTRL_BASE_OFFSET 0x40
-#define MTL_ETSX_CTRL_BASE_ADDR(x) (MTL_ETS_CTRL_BASE_ADDR + \
- ((x) * MTL_ETS_CTRL_BASE_OFFSET))
+
+static inline u32 mtl_etsx_ctrl_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_ets_ctrl + (x * addrs->mtl_ets_ctrl_offset);
+ else
+ addr = MTL_ETS_CTRL_BASE_ADDR + (x * MTL_ETS_CTRL_BASE_OFFSET);
+
+ return addr;
+}
#define MTL_ETS_CTRL_CC BIT(3)
#define MTL_ETS_CTRL_AVALG BIT(2)
@@ -397,31 +419,76 @@ enum power_event {
/* MTL Queue Quantum Weight */
#define MTL_TXQ_WEIGHT_BASE_ADDR 0x00000d18
#define MTL_TXQ_WEIGHT_BASE_OFFSET 0x40
-#define MTL_TXQX_WEIGHT_BASE_ADDR(x) (MTL_TXQ_WEIGHT_BASE_ADDR + \
- ((x) * MTL_TXQ_WEIGHT_BASE_OFFSET))
+
+static inline u32 mtl_txqx_weight_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_txq_weight + (x * addrs->mtl_txq_weight_offset);
+ else
+ addr = MTL_TXQ_WEIGHT_BASE_ADDR + (x * MTL_TXQ_WEIGHT_BASE_OFFSET);
+
+ return addr;
+}
+
#define MTL_TXQ_WEIGHT_ISCQW_MASK GENMASK(20, 0)
/* MTL sendSlopeCredit register */
#define MTL_SEND_SLP_CRED_BASE_ADDR 0x00000d1c
#define MTL_SEND_SLP_CRED_OFFSET 0x40
-#define MTL_SEND_SLP_CREDX_BASE_ADDR(x) (MTL_SEND_SLP_CRED_BASE_ADDR + \
- ((x) * MTL_SEND_SLP_CRED_OFFSET))
+
+static inline u32 mtl_send_slp_credx_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_send_slp_cred + (x * addrs->mtl_send_slp_cred_offset);
+ else
+ addr = MTL_SEND_SLP_CRED_BASE_ADDR + (x * MTL_SEND_SLP_CRED_OFFSET);
+
+ return addr;
+}
#define MTL_SEND_SLP_CRED_SSC_MASK GENMASK(13, 0)
/* MTL hiCredit register */
#define MTL_HIGH_CRED_BASE_ADDR 0x00000d20
#define MTL_HIGH_CRED_OFFSET 0x40
-#define MTL_HIGH_CREDX_BASE_ADDR(x) (MTL_HIGH_CRED_BASE_ADDR + \
- ((x) * MTL_HIGH_CRED_OFFSET))
+
+static inline u32 mtl_high_credx_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_high_cred + (x * addrs->mtl_high_cred_offset);
+ else
+ addr = MTL_HIGH_CRED_BASE_ADDR + (x * MTL_HIGH_CRED_OFFSET);
+
+ return addr;
+}
#define MTL_HIGH_CRED_HC_MASK GENMASK(28, 0)
/* MTL loCredit register */
#define MTL_LOW_CRED_BASE_ADDR 0x00000d24
#define MTL_LOW_CRED_OFFSET 0x40
-#define MTL_LOW_CREDX_BASE_ADDR(x) (MTL_LOW_CRED_BASE_ADDR + \
- ((x) * MTL_LOW_CRED_OFFSET))
+
+static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->mtl_low_cred + (x * addrs->mtl_low_cred_offset);
+ else
+ addr = MTL_LOW_CRED_BASE_ADDR + (x * MTL_LOW_CRED_OFFSET);
+
+ return addr;
+}
#define MTL_HIGH_CRED_LC_MASK GENMASK(28, 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 36251ec2589c..afaec3fb9ab6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -198,15 +198,18 @@ static void dwmac4_prog_mtl_tx_algorithms(struct mac_device_info *hw,
writel(value, ioaddr + MTL_OPERATION_MODE);
}
-static void dwmac4_set_mtl_tx_queue_weight(struct mac_device_info *hw,
+static void dwmac4_set_mtl_tx_queue_weight(struct stmmac_priv *priv,
+ struct mac_device_info *hw,
u32 weight, u32 queue)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
void __iomem *ioaddr = hw->pcsr;
- u32 value = readl(ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+ u32 value = readl(ioaddr + mtl_txqx_weight_base_addr(dwmac4_addrs,
+ queue));
value &= ~MTL_TXQ_WEIGHT_ISCQW_MASK;
value |= weight & MTL_TXQ_WEIGHT_ISCQW_MASK;
- writel(value, ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+ writel(value, ioaddr + mtl_txqx_weight_base_addr(dwmac4_addrs, queue));
}
static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
@@ -227,10 +230,12 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
}
}
-static void dwmac4_config_cbs(struct mac_device_info *hw,
+static void dwmac4_config_cbs(struct stmmac_priv *priv,
+ struct mac_device_info *hw,
u32 send_slope, u32 idle_slope,
u32 high_credit, u32 low_credit, u32 queue)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
void __iomem *ioaddr = hw->pcsr;
u32 value;
@@ -241,31 +246,33 @@ static void dwmac4_config_cbs(struct mac_device_info *hw,
pr_debug("\tlow_credit: 0x%08x\n", low_credit);
/* enable AV algorithm */
- value = readl(ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+ value = readl(ioaddr + mtl_etsx_ctrl_base_addr(dwmac4_addrs, queue));
value |= MTL_ETS_CTRL_AVALG;
value |= MTL_ETS_CTRL_CC;
- writel(value, ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+ writel(value, ioaddr + mtl_etsx_ctrl_base_addr(dwmac4_addrs, queue));
/* configure send slope */
- value = readl(ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+ value = readl(ioaddr + mtl_send_slp_credx_base_addr(dwmac4_addrs,
+ queue));
value &= ~MTL_SEND_SLP_CRED_SSC_MASK;
value |= send_slope & MTL_SEND_SLP_CRED_SSC_MASK;
- writel(value, ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+ writel(value, ioaddr + mtl_send_slp_credx_base_addr(dwmac4_addrs,
+ queue));
/* configure idle slope (same register as tx weight) */
- dwmac4_set_mtl_tx_queue_weight(hw, idle_slope, queue);
+ dwmac4_set_mtl_tx_queue_weight(priv, hw, idle_slope, queue);
/* configure high credit */
- value = readl(ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+ value = readl(ioaddr + mtl_high_credx_base_addr(dwmac4_addrs, queue));
value &= ~MTL_HIGH_CRED_HC_MASK;
value |= high_credit & MTL_HIGH_CRED_HC_MASK;
- writel(value, ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+ writel(value, ioaddr + mtl_high_credx_base_addr(dwmac4_addrs, queue));
/* configure high credit */
- value = readl(ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+ value = readl(ioaddr + mtl_low_credx_base_addr(dwmac4_addrs, queue));
value &= ~MTL_HIGH_CRED_LC_MASK;
value |= low_credit & MTL_HIGH_CRED_LC_MASK;
- writel(value, ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+ writel(value, ioaddr + mtl_low_credx_base_addr(dwmac4_addrs, queue));
}
static void dwmac4_dump_regs(struct mac_device_info *hw, u32 *reg_space)
@@ -759,8 +766,10 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x)
}
}
-static int dwmac4_irq_mtl_status(struct mac_device_info *hw, u32 chan)
+static int dwmac4_irq_mtl_status(struct stmmac_priv *priv,
+ struct mac_device_info *hw, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
void __iomem *ioaddr = hw->pcsr;
u32 mtl_int_qx_status;
int ret = 0;
@@ -770,12 +779,13 @@ static int dwmac4_irq_mtl_status(struct mac_device_info *hw, u32 chan)
/* Check MTL Interrupt */
if (mtl_int_qx_status & MTL_INT_QX(chan)) {
/* read Queue x Interrupt status */
- u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(chan));
+ u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(dwmac4_addrs,
+ chan));
if (status & MTL_RX_OVERFLOW_INT) {
/* clear Interrupt */
writel(status | MTL_RX_OVERFLOW_INT,
- ioaddr + MTL_CHAN_INT_CTRL(chan));
+ ioaddr + MTL_CHAN_INT_CTRL(dwmac4_addrs, chan));
ret = CORE_IRQ_MTL_RX_OVERFLOW;
}
}
@@ -833,14 +843,16 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
return ret;
}
-static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+static void dwmac4_debug(struct stmmac_priv *priv, void __iomem *ioaddr,
+ struct stmmac_extra_stats *x,
u32 rx_queues, u32 tx_queues)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
u32 queue;
for (queue = 0; queue < tx_queues; queue++) {
- value = readl(ioaddr + MTL_CHAN_TX_DEBUG(queue));
+ value = readl(ioaddr + MTL_CHAN_TX_DEBUG(dwmac4_addrs, queue));
if (value & MTL_DEBUG_TXSTSFSTS)
x->mtl_tx_status_fifo_full++;
@@ -865,7 +877,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
}
for (queue = 0; queue < rx_queues; queue++) {
- value = readl(ioaddr + MTL_CHAN_RX_DEBUG(queue));
+ value = readl(ioaddr + MTL_CHAN_RX_DEBUG(dwmac4_addrs, queue));
if (value & MTL_DEBUG_RXFSTS_MASK) {
u32 rxfsts = (value & MTL_DEBUG_RXFSTS_MASK)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 8cc80b1db4cb..6a011d8633e8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -13,11 +13,11 @@
#include "dwmac4.h"
#include "dwmac4_descs.h"
-static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_tx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p,
void __iomem *ioaddr)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
unsigned int tdes3;
int ret = tx_done;
@@ -73,10 +73,10 @@ static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x,
return ret;
}
-static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
+static int dwmac4_wrback_get_rx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
unsigned int rdes1 = le32_to_cpu(p->des1);
unsigned int rdes2 = le32_to_cpu(p->des2);
unsigned int rdes3 = le32_to_cpu(p->des3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index d99fa028c646..84d3a8551b03 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include "dwmac4.h"
#include "dwmac4_dma.h"
+#include "stmmac.h"
static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
{
@@ -68,77 +69,87 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
writel(value, ioaddr + DMA_SYS_BUS_MODE);
}
-static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
+static void dwmac4_dma_init_rx_chan(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_rx_phy, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
- value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
- writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
writel(upper_32_bits(dma_rx_phy),
- ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(chan));
+ ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(dwmac4_addrs, chan));
- writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
+ writel(lower_32_bits(dma_rx_phy),
+ ioaddr + DMA_CHAN_RX_BASE_ADDR(dwmac4_addrs, chan));
}
-static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
+static void dwmac4_dma_init_tx_chan(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t dma_tx_phy, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
- value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
/* Enable OSP to get best performance */
value |= DMA_CONTROL_OSP;
- writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
writel(upper_32_bits(dma_tx_phy),
- ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(chan));
+ ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(dwmac4_addrs, chan));
- writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
+ writel(lower_32_bits(dma_tx_phy),
+ ioaddr + DMA_CHAN_TX_BASE_ADDR(dwmac4_addrs, chan));
}
-static void dwmac4_dma_init_channel(void __iomem *ioaddr,
+static void dwmac4_dma_init_channel(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
/* common channel control register config */
- value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
if (dma_cfg->pblx8)
value = value | DMA_BUS_MODE_PBL;
- writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
/* Mask interrupts by writing to CSR7 */
writel(DMA_CHAN_INTR_DEFAULT_MASK,
- ioaddr + DMA_CHAN_INTR_ENA(chan));
+ ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
-static void dwmac410_dma_init_channel(void __iomem *ioaddr,
+static void dwmac410_dma_init_channel(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
/* common channel control register config */
- value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
if (dma_cfg->pblx8)
value = value | DMA_BUS_MODE_PBL;
- writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
/* Mask interrupts by writing to CSR7 */
writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
- ioaddr + DMA_CHAN_INTR_ENA(chan));
+ ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
static void dwmac4_dma_init(void __iomem *ioaddr,
@@ -176,65 +187,78 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
}
-static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel,
+static void _dwmac4_dump_dma_regs(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 channel,
u32 *reg_space)
{
- reg_space[DMA_CHAN_CONTROL(channel) / 4] =
- readl(ioaddr + DMA_CHAN_CONTROL(channel));
- reg_space[DMA_CHAN_TX_CONTROL(channel) / 4] =
- readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
- reg_space[DMA_CHAN_RX_CONTROL(channel) / 4] =
- readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
- reg_space[DMA_CHAN_TX_BASE_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_TX_BASE_ADDR(channel));
- reg_space[DMA_CHAN_RX_BASE_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_RX_BASE_ADDR(channel));
- reg_space[DMA_CHAN_TX_END_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_TX_END_ADDR(channel));
- reg_space[DMA_CHAN_RX_END_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_RX_END_ADDR(channel));
- reg_space[DMA_CHAN_TX_RING_LEN(channel) / 4] =
- readl(ioaddr + DMA_CHAN_TX_RING_LEN(channel));
- reg_space[DMA_CHAN_RX_RING_LEN(channel) / 4] =
- readl(ioaddr + DMA_CHAN_RX_RING_LEN(channel));
- reg_space[DMA_CHAN_INTR_ENA(channel) / 4] =
- readl(ioaddr + DMA_CHAN_INTR_ENA(channel));
- reg_space[DMA_CHAN_RX_WATCHDOG(channel) / 4] =
- readl(ioaddr + DMA_CHAN_RX_WATCHDOG(channel));
- reg_space[DMA_CHAN_SLOT_CTRL_STATUS(channel) / 4] =
- readl(ioaddr + DMA_CHAN_SLOT_CTRL_STATUS(channel));
- reg_space[DMA_CHAN_CUR_TX_DESC(channel) / 4] =
- readl(ioaddr + DMA_CHAN_CUR_TX_DESC(channel));
- reg_space[DMA_CHAN_CUR_RX_DESC(channel) / 4] =
- readl(ioaddr + DMA_CHAN_CUR_RX_DESC(channel));
- reg_space[DMA_CHAN_CUR_TX_BUF_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_CUR_TX_BUF_ADDR(channel));
- reg_space[DMA_CHAN_CUR_RX_BUF_ADDR(channel) / 4] =
- readl(ioaddr + DMA_CHAN_CUR_RX_BUF_ADDR(channel));
- reg_space[DMA_CHAN_STATUS(channel) / 4] =
- readl(ioaddr + DMA_CHAN_STATUS(channel));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ const struct dwmac4_addrs *default_addrs = NULL;
+
+ /* Purposely save the registers in the "normal" layout, regardless of
+ * platform modifications, to keep reg_space size constant
+ */
+ reg_space[DMA_CHAN_CONTROL(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_TX_CONTROL(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_RX_CONTROL(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_TX_BASE_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_TX_BASE_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_RX_BASE_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_RX_BASE_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_TX_END_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_TX_END_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_RX_END_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_RX_END_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_TX_RING_LEN(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_TX_RING_LEN(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_RX_RING_LEN(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_RX_RING_LEN(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_INTR_ENA(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_RX_WATCHDOG(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_RX_WATCHDOG(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_SLOT_CTRL_STATUS(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_SLOT_CTRL_STATUS(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_CUR_TX_DESC(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_CUR_TX_DESC(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_CUR_RX_DESC(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_CUR_RX_DESC(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_CUR_TX_BUF_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_CUR_TX_BUF_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_CUR_RX_BUF_ADDR(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_CUR_RX_BUF_ADDR(dwmac4_addrs, channel));
+ reg_space[DMA_CHAN_STATUS(default_addrs, channel) / 4] =
+ readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, channel));
}
-static void dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
+static void dwmac4_dump_dma_regs(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 *reg_space)
{
int i;
for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
- _dwmac4_dump_dma_regs(ioaddr, i, reg_space);
+ _dwmac4_dump_dma_regs(priv, ioaddr, i, reg_space);
}
-static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
+static void dwmac4_rx_watchdog(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 riwt, u32 queue)
{
- writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(queue));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(dwmac4_addrs, queue));
}
-static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
+static void dwmac4_dma_rx_chan_op_mode(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
unsigned int rqs = fifosz / 256 - 1;
u32 mtl_rx_op;
- mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+ mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(dwmac4_addrs, channel));
if (mode == SF_DMA_MODE) {
pr_debug("GMAC: enable RX store and forward mode\n");
@@ -292,13 +316,16 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
mtl_rx_op |= rfa << MTL_OP_MODE_RFA_SHIFT;
}
- writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+ writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(dwmac4_addrs, channel));
}
-static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
+static void dwmac4_dma_tx_chan_op_mode(struct stmmac_priv *priv,
+ void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
- u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(dwmac4_addrs,
+ channel));
unsigned int tqs = fifosz / 256 - 1;
if (mode == SF_DMA_MODE) {
@@ -344,7 +371,7 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK;
mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT;
- writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+ writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(dwmac4_addrs, channel));
}
static int dwmac4_get_hw_feature(void __iomem *ioaddr,
@@ -442,26 +469,31 @@ static int dwmac4_get_hw_feature(void __iomem *ioaddr,
}
/* Enable/disable TSO feature and set MSS */
-static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
+static void dwmac4_enable_tso(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value;
if (en) {
/* enable TSO */
- value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
writel(value | DMA_CONTROL_TSE,
- ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
} else {
/* enable TSO */
- value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
writel(value & ~DMA_CONTROL_TSE,
- ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
}
}
-static void dwmac4_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
+static void dwmac4_qmode(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 channel, u8 qmode)
{
- u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(dwmac4_addrs,
+ channel));
mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK;
if (qmode != MTL_QUEUE_AVB)
@@ -469,47 +501,54 @@ static void dwmac4_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
else
mtl_tx_op |= MTL_OP_MODE_TXQEN_AV;
- writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+ writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(dwmac4_addrs, channel));
}
-static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
+static void dwmac4_set_bfsize(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int bfsize, u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
value &= ~DMA_RBSZ_MASK;
value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK;
- writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
}
-static void dwmac4_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
+static void dwmac4_enable_sph(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 value = readl(ioaddr + GMAC_EXT_CONFIG);
value &= ~GMAC_CONFIG_HDSMS;
value |= GMAC_CONFIG_HDSMS_256; /* Segment max 256 bytes */
writel(value, ioaddr + GMAC_EXT_CONFIG);
- value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+ value = readl(ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
if (en)
value |= DMA_CONTROL_SPH;
else
value &= ~DMA_CONTROL_SPH;
- writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_CONTROL(dwmac4_addrs, chan));
}
-static int dwmac4_enable_tbs(void __iomem *ioaddr, bool en, u32 chan)
+static int dwmac4_enable_tbs(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
if (en)
value |= DMA_CONTROL_EDSE;
else
value &= ~DMA_CONTROL_EDSE;
- writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
- value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan)) & DMA_CONTROL_EDSE;
+ value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs,
+ chan)) & DMA_CONTROL_EDSE;
if (en && !value)
return -EIO;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 9321879b599c..358e7dcb6a9a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -95,29 +95,41 @@
/* Following DMA defines are chanels oriented */
#define DMA_CHAN_BASE_ADDR 0x00001100
#define DMA_CHAN_BASE_OFFSET 0x80
-#define DMA_CHANX_BASE_ADDR(x) (DMA_CHAN_BASE_ADDR + \
- (x * DMA_CHAN_BASE_OFFSET))
+
+static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs,
+ const u32 x)
+{
+ u32 addr;
+
+ if (addrs)
+ addr = addrs->dma_chan + (x * addrs->dma_chan_offset);
+ else
+ addr = DMA_CHAN_BASE_ADDR + (x * DMA_CHAN_BASE_OFFSET);
+
+ return addr;
+}
+
#define DMA_CHAN_REG_NUMBER 17
-#define DMA_CHAN_CONTROL(x) DMA_CHANX_BASE_ADDR(x)
-#define DMA_CHAN_TX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x4)
-#define DMA_CHAN_RX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x8)
-#define DMA_CHAN_TX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x10)
-#define DMA_CHAN_TX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x14)
-#define DMA_CHAN_RX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x18)
-#define DMA_CHAN_RX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x1c)
-#define DMA_CHAN_TX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x20)
-#define DMA_CHAN_RX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x28)
-#define DMA_CHAN_TX_RING_LEN(x) (DMA_CHANX_BASE_ADDR(x) + 0x2c)
-#define DMA_CHAN_RX_RING_LEN(x) (DMA_CHANX_BASE_ADDR(x) + 0x30)
-#define DMA_CHAN_INTR_ENA(x) (DMA_CHANX_BASE_ADDR(x) + 0x34)
-#define DMA_CHAN_RX_WATCHDOG(x) (DMA_CHANX_BASE_ADDR(x) + 0x38)
-#define DMA_CHAN_SLOT_CTRL_STATUS(x) (DMA_CHANX_BASE_ADDR(x) + 0x3c)
-#define DMA_CHAN_CUR_TX_DESC(x) (DMA_CHANX_BASE_ADDR(x) + 0x44)
-#define DMA_CHAN_CUR_RX_DESC(x) (DMA_CHANX_BASE_ADDR(x) + 0x4c)
-#define DMA_CHAN_CUR_TX_BUF_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x54)
-#define DMA_CHAN_CUR_RX_BUF_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x5c)
-#define DMA_CHAN_STATUS(x) (DMA_CHANX_BASE_ADDR(x) + 0x60)
+#define DMA_CHAN_CONTROL(addrs, x) dma_chanx_base_addr(addrs, x)
+#define DMA_CHAN_TX_CONTROL(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x4)
+#define DMA_CHAN_RX_CONTROL(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x8)
+#define DMA_CHAN_TX_BASE_ADDR_HI(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x10)
+#define DMA_CHAN_TX_BASE_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x14)
+#define DMA_CHAN_RX_BASE_ADDR_HI(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x18)
+#define DMA_CHAN_RX_BASE_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x1c)
+#define DMA_CHAN_TX_END_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x20)
+#define DMA_CHAN_RX_END_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x28)
+#define DMA_CHAN_TX_RING_LEN(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x2c)
+#define DMA_CHAN_RX_RING_LEN(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x30)
+#define DMA_CHAN_INTR_ENA(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x34)
+#define DMA_CHAN_RX_WATCHDOG(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x38)
+#define DMA_CHAN_SLOT_CTRL_STATUS(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x3c)
+#define DMA_CHAN_CUR_TX_DESC(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x44)
+#define DMA_CHAN_CUR_RX_DESC(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x4c)
+#define DMA_CHAN_CUR_TX_BUF_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x54)
+#define DMA_CHAN_CUR_RX_BUF_ADDR(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x5c)
+#define DMA_CHAN_STATUS(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x60)
/* DMA Control X */
#define DMA_CONTROL_SPH BIT(24)
@@ -220,19 +232,31 @@
#define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8
int dwmac4_dma_reset(void __iomem *ioaddr);
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan);
-void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
-void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
-void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan);
-int dwmac4_dma_interrupt(void __iomem *ioaddr,
+void dwmac4_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac410_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac410_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac4_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac4_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac4_dma_start_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac4_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir);
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
-void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
-void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
+void dwmac4_set_rx_ring_len(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan);
+void dwmac4_set_tx_ring_len(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan);
+void dwmac4_set_rx_tail_ptr(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan);
+void dwmac4_set_tx_tail_ptr(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan);
#endif /* __DWMAC4_DMA_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index d1c605777985..df41eac54058 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -11,6 +11,7 @@
#include "common.h"
#include "dwmac4_dma.h"
#include "dwmac4.h"
+#include "stmmac.h"
int dwmac4_dma_reset(void __iomem *ioaddr)
{
@@ -25,120 +26,151 @@ int dwmac4_dma_reset(void __iomem *ioaddr)
10000, 1000000);
}
-void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
+void dwmac4_set_rx_tail_ptr(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan)
{
- writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(dwmac4_addrs, chan));
}
-void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
+void dwmac4_set_tx_tail_ptr(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan)
{
- writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(dwmac4_addrs, chan));
}
-void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan)
+void dwmac4_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
value |= DMA_CONTROL_ST;
- writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
value = readl(ioaddr + GMAC_CONFIG);
value |= GMAC_CONFIG_TE;
writel(value, ioaddr + GMAC_CONFIG);
}
-void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan)
+void dwmac4_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
value &= ~DMA_CONTROL_ST;
- writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_TX_CONTROL(dwmac4_addrs, chan));
}
-void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan)
+void dwmac4_dma_start_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
value |= DMA_CONTROL_SR;
- writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
value = readl(ioaddr + GMAC_CONFIG);
value |= GMAC_CONFIG_RE;
writel(value, ioaddr + GMAC_CONFIG);
}
-void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
+void dwmac4_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
- u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
value &= ~DMA_CONTROL_SR;
- writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+ writel(value, ioaddr + DMA_CHAN_RX_CONTROL(dwmac4_addrs, chan));
}
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
+void dwmac4_set_tx_ring_len(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan)
{
- writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(dwmac4_addrs, chan));
}
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
+void dwmac4_set_rx_ring_len(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan)
{
- writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+
+ writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(dwmac4_addrs, chan));
}
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac4_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
- u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
if (rx)
value |= DMA_CHAN_INTR_DEFAULT_RX;
if (tx)
value |= DMA_CHAN_INTR_DEFAULT_TX;
- writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac410_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
- u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
if (rx)
value |= DMA_CHAN_INTR_DEFAULT_RX_4_10;
if (tx)
value |= DMA_CHAN_INTR_DEFAULT_TX_4_10;
- writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
- u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
if (rx)
value &= ~DMA_CHAN_INTR_DEFAULT_RX;
if (tx)
value &= ~DMA_CHAN_INTR_DEFAULT_TX;
- writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
-void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac410_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
- u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
if (rx)
value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10;
if (tx)
value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10;
- writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
}
-int dwmac4_dma_interrupt(void __iomem *ioaddr,
+int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir)
{
- u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
- u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+ const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
+ u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
+ u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
int ret = 0;
if (dir == DMA_DIR_RX)
@@ -183,7 +215,8 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
x->rx_early_irq++;
- writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan));
+ writel(intr_status & intr_en,
+ ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index acd70b9a3173..72672391675f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -153,14 +153,20 @@
#define NUM_DWMAC4_DMA_REGS 27
void dwmac_enable_dma_transmission(void __iomem *ioaddr);
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
-void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan);
-void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
-void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
-void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan);
-int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x,
- u32 chan, u32 dir);
+void dwmac_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+void dwmac_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac_dma_start_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+void dwmac_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
+ struct stmmac_extra_stats *x, u32 chan, u32 dir);
int dwmac_dma_reset(void __iomem *ioaddr);
#endif /* __DWMAC_DMA_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 9b6138b11776..0b6f999a8305 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -32,7 +32,8 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr)
writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
}
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
u32 value = readl(ioaddr + DMA_INTR_ENA);
@@ -44,7 +45,8 @@ void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
writel(value, ioaddr + DMA_INTR_ENA);
}
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+void dwmac_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx)
{
u32 value = readl(ioaddr + DMA_INTR_ENA);
@@ -56,28 +58,30 @@ void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
writel(value, ioaddr + DMA_INTR_ENA);
}
-void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
+void dwmac_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
u32 value = readl(ioaddr + DMA_CONTROL);
value |= DMA_CONTROL_ST;
writel(value, ioaddr + DMA_CONTROL);
}
-void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
+void dwmac_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan)
{
u32 value = readl(ioaddr + DMA_CONTROL);
value &= ~DMA_CONTROL_ST;
writel(value, ioaddr + DMA_CONTROL);
}
-void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
+void dwmac_dma_start_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
u32 value = readl(ioaddr + DMA_CONTROL);
value |= DMA_CONTROL_SR;
writel(value, ioaddr + DMA_CONTROL);
}
-void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
+void dwmac_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan)
{
u32 value = readl(ioaddr + DMA_CONTROL);
value &= ~DMA_CONTROL_SR;
@@ -154,7 +158,7 @@ static void show_rx_process_state(unsigned int status)
}
#endif
-int dwmac_dma_interrupt(void __iomem *ioaddr,
+int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir)
{
int ret = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index c6c4d7948fe5..a0c2ef8bb0ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -187,7 +187,8 @@ static void dwxgmac2_prog_mtl_tx_algorithms(struct mac_device_info *hw,
}
}
-static void dwxgmac2_set_mtl_tx_queue_weight(struct mac_device_info *hw,
+static void dwxgmac2_set_mtl_tx_queue_weight(struct stmmac_priv *priv,
+ struct mac_device_info *hw,
u32 weight, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
@@ -212,7 +213,8 @@ static void dwxgmac2_map_mtl_to_dma(struct mac_device_info *hw, u32 queue,
writel(value, ioaddr + reg);
}
-static void dwxgmac2_config_cbs(struct mac_device_info *hw,
+static void dwxgmac2_config_cbs(struct stmmac_priv *priv,
+ struct mac_device_info *hw,
u32 send_slope, u32 idle_slope,
u32 high_credit, u32 low_credit, u32 queue)
{
@@ -276,7 +278,8 @@ static int dwxgmac2_host_irq_status(struct mac_device_info *hw,
return ret;
}
-static int dwxgmac2_host_mtl_irq_status(struct mac_device_info *hw, u32 chan)
+static int dwxgmac2_host_mtl_irq_status(struct stmmac_priv *priv,
+ struct mac_device_info *hw, u32 chan)
{
void __iomem *ioaddr = hw->pcsr;
int ret = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index b1f0c3984a09..13c347ee8be9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -8,7 +8,8 @@
#include "common.h"
#include "dwxgmac2.h"
-static int dwxgmac2_get_tx_status(void *data, struct stmmac_extra_stats *x,
+static int dwxgmac2_get_tx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
unsigned int tdes3 = le32_to_cpu(p->des3);
@@ -22,7 +23,8 @@ static int dwxgmac2_get_tx_status(void *data, struct stmmac_extra_stats *x,
return ret;
}
-static int dwxgmac2_get_rx_status(void *data, struct stmmac_extra_stats *x,
+static int dwxgmac2_get_rx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p)
{
unsigned int rdes3 = le32_to_cpu(p->des3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 5e98355f422b..dfd53264e036 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -33,7 +33,8 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
}
-static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
+static void dwxgmac2_dma_init_chan(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg, u32 chan)
{
u32 value = readl(ioaddr + XGMAC_DMA_CH_CONTROL(chan));
@@ -45,7 +46,8 @@ static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
}
-static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr,
+static void dwxgmac2_dma_init_rx_chan(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t phy, u32 chan)
{
@@ -61,7 +63,8 @@ static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr,
writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan));
}
-static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr,
+static void dwxgmac2_dma_init_tx_chan(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t phy, u32 chan)
{
@@ -131,7 +134,8 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
}
-static void dwxgmac2_dma_dump_regs(void __iomem *ioaddr, u32 *reg_space)
+static void dwxgmac2_dma_dump_regs(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 *reg_space)
{
int i;
@@ -139,8 +143,8 @@ static void dwxgmac2_dma_dump_regs(void __iomem *ioaddr, u32 *reg_space)
reg_space[i] = readl(ioaddr + i * 4);
}
-static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
- u32 channel, int fifosz, u8 qmode)
+static void dwxgmac2_dma_rx_mode(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int mode, u32 channel, int fifosz, u8 qmode)
{
u32 value = readl(ioaddr + XGMAC_MTL_RXQ_OPMODE(channel));
unsigned int rqs = fifosz / 256 - 1;
@@ -205,8 +209,8 @@ static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
writel(value | XGMAC_RXOIE, ioaddr + XGMAC_MTL_QINTEN(channel));
}
-static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode,
- u32 channel, int fifosz, u8 qmode)
+static void dwxgmac2_dma_tx_mode(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int mode, u32 channel, int fifosz, u8 qmode)
{
u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
unsigned int tqs = fifosz / 256 - 1;
@@ -248,7 +252,8 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode,
writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
}
-static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+static void dwxgmac2_enable_dma_irq(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan,
bool rx, bool tx)
{
u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
@@ -261,7 +266,8 @@ static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan,
writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
}
-static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+static void dwxgmac2_disable_dma_irq(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan,
bool rx, bool tx)
{
u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
@@ -274,7 +280,8 @@ static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan,
writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
}
-static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_dma_start_tx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 value;
@@ -287,7 +294,8 @@ static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan)
writel(value, ioaddr + XGMAC_TX_CONFIG);
}
-static void dwxgmac2_dma_stop_tx(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
u32 value;
@@ -300,7 +308,8 @@ static void dwxgmac2_dma_stop_tx(void __iomem *ioaddr, u32 chan)
writel(value, ioaddr + XGMAC_TX_CONFIG);
}
-static void dwxgmac2_dma_start_rx(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_dma_start_rx(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 chan)
{
u32 value;
@@ -313,7 +322,8 @@ static void dwxgmac2_dma_start_rx(void __iomem *ioaddr, u32 chan)
writel(value, ioaddr + XGMAC_RX_CONFIG);
}
-static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan)
{
u32 value;
@@ -322,7 +332,8 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
}
-static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
+static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
+ void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan,
u32 dir)
{
@@ -449,32 +460,38 @@ static int dwxgmac2_get_hw_feature(void __iomem *ioaddr,
return 0;
}
-static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
+static void dwxgmac2_rx_watchdog(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 riwt, u32 queue)
{
writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(queue));
}
-static void dwxgmac2_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
+static void dwxgmac2_set_rx_ring_len(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 len, u32 chan)
{
writel(len, ioaddr + XGMAC_DMA_CH_RxDESC_RING_LEN(chan));
}
-static void dwxgmac2_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
+static void dwxgmac2_set_tx_ring_len(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 len, u32 chan)
{
writel(len, ioaddr + XGMAC_DMA_CH_TxDESC_RING_LEN(chan));
}
-static void dwxgmac2_set_rx_tail_ptr(void __iomem *ioaddr, u32 ptr, u32 chan)
+static void dwxgmac2_set_rx_tail_ptr(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 ptr, u32 chan)
{
writel(ptr, ioaddr + XGMAC_DMA_CH_RxDESC_TAIL_LPTR(chan));
}
-static void dwxgmac2_set_tx_tail_ptr(void __iomem *ioaddr, u32 ptr, u32 chan)
+static void dwxgmac2_set_tx_tail_ptr(struct stmmac_priv *priv,
+ void __iomem *ioaddr, u32 ptr, u32 chan)
{
writel(ptr, ioaddr + XGMAC_DMA_CH_TxDESC_TAIL_LPTR(chan));
}
-static void dwxgmac2_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
+static void dwxgmac2_enable_tso(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
u32 value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
@@ -486,7 +503,8 @@ static void dwxgmac2_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
}
-static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
+static void dwxgmac2_qmode(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 channel, u8 qmode)
{
u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
u32 flow = readl(ioaddr + XGMAC_RX_FLOW_CTRL);
@@ -503,7 +521,8 @@ static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
}
-static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
+static void dwxgmac2_set_bfsize(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int bfsize, u32 chan)
{
u32 value;
@@ -513,7 +532,8 @@ static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
}
-static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
+static void dwxgmac2_enable_sph(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
@@ -529,7 +549,8 @@ static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan));
}
-static int dwxgmac2_enable_tbs(void __iomem *ioaddr, bool en, u32 chan)
+static int dwxgmac2_enable_tbs(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan)
{
u32 value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 1bcbbd724fb5..a91d8f13a931 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -12,10 +12,10 @@
#include "common.h"
#include "descs_com.h"
-static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
+static int enh_desc_get_tx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
unsigned int tdes0 = le32_to_cpu(p->des0);
int ret = tx_done;
@@ -117,7 +117,8 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
return ret;
}
-static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
+static void enh_desc_get_ext_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_extended_desc *p)
{
unsigned int rdes0 = le32_to_cpu(p->basic.des0);
@@ -181,10 +182,10 @@ static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
}
}
-static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
+static int enh_desc_get_rx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
unsigned int rdes0 = le32_to_cpu(p->des0);
int ret = good_frame;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 1cc286b000b6..6ee7cf07cfd7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -26,6 +26,7 @@
})
struct stmmac_extra_stats;
+struct stmmac_priv;
struct stmmac_safety_stats;
struct dma_desc;
struct dma_extended_desc;
@@ -56,8 +57,9 @@ struct stmmac_desc_ops {
/* Last tx segment reports the transmit status */
int (*get_tx_ls)(struct dma_desc *p);
/* Return the transmit status looking at the TDES1 */
- int (*tx_status)(void *data, struct stmmac_extra_stats *x,
- struct dma_desc *p, void __iomem *ioaddr);
+ int (*tx_status)(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
+ struct dma_desc *p, void __iomem *ioaddr);
/* Get the buffer size from the descriptor */
int (*get_tx_len)(struct dma_desc *p);
/* Handle extra events on specific interrupts hw dependent */
@@ -65,10 +67,12 @@ struct stmmac_desc_ops {
/* Get the receive frame size */
int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
/* Return the reception status looking at the RDES1 */
- int (*rx_status)(void *data, struct stmmac_extra_stats *x,
- struct dma_desc *p);
- void (*rx_extended_status)(void *data, struct stmmac_extra_stats *x,
- struct dma_extended_desc *p);
+ int (*rx_status)(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
+ struct dma_desc *p);
+ void (*rx_extended_status)(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
+ struct dma_extended_desc *p);
/* Set tx timestamp enable bit */
void (*enable_tx_timestamp) (struct dma_desc *p);
/* get tx timestamp status */
@@ -168,108 +172,125 @@ struct stmmac_dma_ops {
int (*reset)(void __iomem *ioaddr);
void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
int atds);
- void (*init_chan)(void __iomem *ioaddr,
+ void (*init_chan)(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg, u32 chan);
- void (*init_rx_chan)(void __iomem *ioaddr,
+ void (*init_rx_chan)(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t phy, u32 chan);
- void (*init_tx_chan)(void __iomem *ioaddr,
+ void (*init_tx_chan)(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_dma_cfg *dma_cfg,
dma_addr_t phy, u32 chan);
/* Configure the AXI Bus Mode Register */
void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
/* Dump DMA registers */
- void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
- void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
- int fifosz, u8 qmode);
- void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+ void (*dump_regs)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 *reg_space);
+ void (*dma_rx_mode)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int mode, u32 channel,
int fifosz, u8 qmode);
+ void (*dma_tx_mode)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int mode, u32 channel, int fifosz, u8 qmode);
/* To track extra statistic (if supported) */
- void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
- void __iomem *ioaddr);
+ void (*dma_diagnostic_fr)(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
+ void __iomem *ioaddr);
void (*enable_dma_transmission) (void __iomem *ioaddr);
- void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan,
- bool rx, bool tx);
- void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan,
- bool rx, bool tx);
- void (*start_tx)(void __iomem *ioaddr, u32 chan);
- void (*stop_tx)(void __iomem *ioaddr, u32 chan);
- void (*start_rx)(void __iomem *ioaddr, u32 chan);
- void (*stop_rx)(void __iomem *ioaddr, u32 chan);
- int (*dma_interrupt) (void __iomem *ioaddr,
- struct stmmac_extra_stats *x, u32 chan, u32 dir);
+ void (*enable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+ void (*disable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan, bool rx, bool tx);
+ void (*start_tx)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+ void (*stop_tx)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+ void (*start_rx)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+ void (*stop_rx)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 chan);
+ int (*dma_interrupt)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ struct stmmac_extra_stats *x, u32 chan, u32 dir);
/* If supported then get the optional core features */
int (*get_hw_feature)(void __iomem *ioaddr,
struct dma_features *dma_cap);
/* Program the HW RX Watchdog */
- void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 queue);
- void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
- void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
- void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
- void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
- void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
- void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
- void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
- void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan);
- int (*enable_tbs)(void __iomem *ioaddr, bool en, u32 chan);
+ void (*rx_watchdog)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 riwt, u32 queue);
+ void (*set_tx_ring_len)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan);
+ void (*set_rx_ring_len)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 len, u32 chan);
+ void (*set_rx_tail_ptr)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan);
+ void (*set_tx_tail_ptr)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 tail_ptr, u32 chan);
+ void (*enable_tso)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan);
+ void (*qmode)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ u32 channel, u8 qmode);
+ void (*set_bfsize)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ int bfsize, u32 chan);
+ void (*enable_sph)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan);
+ int (*enable_tbs)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ bool en, u32 chan);
};
#define stmmac_dma_init(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, init, __args)
#define stmmac_init_chan(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, init_chan, __args)
+ stmmac_do_void_callback(__priv, dma, init_chan, __priv, __args)
#define stmmac_init_rx_chan(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, init_rx_chan, __args)
+ stmmac_do_void_callback(__priv, dma, init_rx_chan, __priv, __args)
#define stmmac_init_tx_chan(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, init_tx_chan, __args)
+ stmmac_do_void_callback(__priv, dma, init_tx_chan, __priv, __args)
#define stmmac_axi(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, axi, __args)
#define stmmac_dump_dma_regs(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, dump_regs, __args)
+ stmmac_do_void_callback(__priv, dma, dump_regs, __priv, __args)
#define stmmac_dma_rx_mode(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args)
+ stmmac_do_void_callback(__priv, dma, dma_rx_mode, __priv, __args)
#define stmmac_dma_tx_mode(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, dma_tx_mode, __args)
+ stmmac_do_void_callback(__priv, dma, dma_tx_mode, __priv, __args)
#define stmmac_dma_diagnostic_fr(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, dma_diagnostic_fr, __args)
#define stmmac_enable_dma_transmission(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, enable_dma_transmission, __args)
#define stmmac_enable_dma_irq(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, enable_dma_irq, __args)
+ stmmac_do_void_callback(__priv, dma, enable_dma_irq, __priv, __args)
#define stmmac_disable_dma_irq(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, disable_dma_irq, __args)
+ stmmac_do_void_callback(__priv, dma, disable_dma_irq, __priv, __args)
#define stmmac_start_tx(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, start_tx, __args)
+ stmmac_do_void_callback(__priv, dma, start_tx, __priv, __args)
#define stmmac_stop_tx(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, stop_tx, __args)
+ stmmac_do_void_callback(__priv, dma, stop_tx, __priv, __args)
#define stmmac_start_rx(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, start_rx, __args)
+ stmmac_do_void_callback(__priv, dma, start_rx, __priv, __args)
#define stmmac_stop_rx(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, stop_rx, __args)
+ stmmac_do_void_callback(__priv, dma, stop_rx, __priv, __args)
#define stmmac_dma_interrupt_status(__priv, __args...) \
- stmmac_do_callback(__priv, dma, dma_interrupt, __args)
+ stmmac_do_callback(__priv, dma, dma_interrupt, __priv, __args)
#define stmmac_get_hw_feature(__priv, __args...) \
stmmac_do_callback(__priv, dma, get_hw_feature, __args)
#define stmmac_rx_watchdog(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, rx_watchdog, __args)
+ stmmac_do_void_callback(__priv, dma, rx_watchdog, __priv, __args)
#define stmmac_set_tx_ring_len(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, set_tx_ring_len, __args)
+ stmmac_do_void_callback(__priv, dma, set_tx_ring_len, __priv, __args)
#define stmmac_set_rx_ring_len(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, set_rx_ring_len, __args)
+ stmmac_do_void_callback(__priv, dma, set_rx_ring_len, __priv, __args)
#define stmmac_set_rx_tail_ptr(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, set_rx_tail_ptr, __args)
+ stmmac_do_void_callback(__priv, dma, set_rx_tail_ptr, __priv, __args)
#define stmmac_set_tx_tail_ptr(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
+ stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __priv, __args)
#define stmmac_enable_tso(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+ stmmac_do_void_callback(__priv, dma, enable_tso, __priv, __args)
#define stmmac_dma_qmode(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, qmode, __args)
+ stmmac_do_void_callback(__priv, dma, qmode, __priv, __args)
#define stmmac_set_dma_bfsize(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
+ stmmac_do_void_callback(__priv, dma, set_bfsize, __priv, __args)
#define stmmac_enable_sph(__priv, __args...) \
- stmmac_do_void_callback(__priv, dma, enable_sph, __args)
+ stmmac_do_void_callback(__priv, dma, enable_sph, __priv, __args)
#define stmmac_enable_tbs(__priv, __args...) \
- stmmac_do_callback(__priv, dma, enable_tbs, __args)
+ stmmac_do_callback(__priv, dma, enable_tbs, __priv, __args)
struct mac_device_info;
struct net_device;
@@ -301,21 +322,23 @@ struct stmmac_ops {
/* Program TX Algorithms */
void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
/* Set MTL TX queues weight */
- void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
+ void (*set_mtl_tx_queue_weight)(struct stmmac_priv *priv,
+ struct mac_device_info *hw,
u32 weight, u32 queue);
/* RX MTL queue to RX dma mapping */
void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
/* Configure AV Algorithm */
- void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
- u32 idle_slope, u32 high_credit, u32 low_credit,
- u32 queue);
+ void (*config_cbs)(struct stmmac_priv *priv, struct mac_device_info *hw,
+ u32 send_slope, u32 idle_slope, u32 high_credit,
+ u32 low_credit, u32 queue);
/* Dump MAC registers */
void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
/* Handle extra events on specific interrupts hw dependent */
int (*host_irq_status)(struct mac_device_info *hw,
struct stmmac_extra_stats *x);
/* Handle MTL interrupts */
- int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
+ int (*host_mtl_irq_status)(struct stmmac_priv *priv,
+ struct mac_device_info *hw, u32 chan);
/* Multicast filter setting */
void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
/* Flow control setting */
@@ -335,8 +358,9 @@ struct stmmac_ops {
void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, int et);
void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
void (*set_eee_pls)(struct mac_device_info *hw, int link);
- void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
- u32 rx_queues, u32 tx_queues);
+ void (*debug)(struct stmmac_priv *priv, void __iomem *ioaddr,
+ struct stmmac_extra_stats *x, u32 rx_queues,
+ u32 tx_queues);
/* PCS calls */
void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
bool loopback);
@@ -416,17 +440,17 @@ struct stmmac_ops {
#define stmmac_prog_mtl_tx_algorithms(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, prog_mtl_tx_algorithms, __args)
#define stmmac_set_mtl_tx_queue_weight(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, set_mtl_tx_queue_weight, __args)
+ stmmac_do_void_callback(__priv, mac, set_mtl_tx_queue_weight, __priv, __args)
#define stmmac_map_mtl_to_dma(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, map_mtl_to_dma, __args)
#define stmmac_config_cbs(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, config_cbs, __args)
+ stmmac_do_void_callback(__priv, mac, config_cbs, __priv, __args)
#define stmmac_dump_mac_regs(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, dump_regs, __args)
#define stmmac_host_irq_status(__priv, __args...) \
stmmac_do_callback(__priv, mac, host_irq_status, __args)
#define stmmac_host_mtl_irq_status(__priv, __args...) \
- stmmac_do_callback(__priv, mac, host_mtl_irq_status, __args)
+ stmmac_do_callback(__priv, mac, host_mtl_irq_status, __priv, __args)
#define stmmac_set_filter(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, set_filter, __args)
#define stmmac_flow_ctrl(__priv, __args...) \
@@ -448,11 +472,11 @@ struct stmmac_ops {
#define stmmac_set_eee_pls(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, set_eee_pls, __args)
#define stmmac_mac_debug(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, debug, __args)
+ stmmac_do_void_callback(__priv, mac, debug, __priv, __args)
#define stmmac_pcs_ctrl_ane(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, pcs_ctrl_ane, __args)
#define stmmac_pcs_rane(__priv, __args...) \
- stmmac_do_void_callback(__priv, mac, pcs_rane, __args)
+ stmmac_do_void_callback(__priv, mac, pcs_rane, __priv, __args)
#define stmmac_pcs_get_adv_lp(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, pcs_get_adv_lp, __args)
#define stmmac_safety_feat_config(__priv, __args...) \
@@ -500,8 +524,6 @@ struct stmmac_ops {
#define stmmac_fpe_irq_status(__priv, __args...) \
stmmac_do_callback(__priv, mac, fpe_irq_status, __args)
-struct stmmac_priv;
-
/* PTP and HW Timer helpers */
struct stmmac_hwtimestamp {
void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
@@ -533,16 +555,20 @@ struct stmmac_hwtimestamp {
#define stmmac_timestamp_interrupt(__priv, __args...) \
stmmac_do_void_callback(__priv, ptp, timestamp_interrupt, __args)
+struct stmmac_tx_queue;
+struct stmmac_rx_queue;
+
/* Helpers to manage the descriptors for chain and ring modes */
struct stmmac_mode_ops {
void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
unsigned int extend_desc);
unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
- int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum);
+ int (*jumbo_frm)(struct stmmac_tx_queue *tx_q, struct sk_buff *skb,
+ int csum);
int (*set_16kib_bfsize)(int mtu);
void (*init_desc3)(struct dma_desc *p);
- void (*refill_desc3) (void *priv, struct dma_desc *p);
- void (*clean_desc3) (void *priv, struct dma_desc *p);
+ void (*refill_desc3)(struct stmmac_rx_queue *rx_q, struct dma_desc *p);
+ void (*clean_desc3)(struct stmmac_tx_queue *tx_q, struct dma_desc *p);
};
#define stmmac_mode_init(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index e3da4da242ee..350e6670a576 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -12,10 +12,10 @@
#include "common.h"
#include "descs_com.h"
-static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
+static int ndesc_get_tx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr)
{
- struct net_device_stats *stats = (struct net_device_stats *)data;
unsigned int tdes0 = le32_to_cpu(p->des0);
unsigned int tdes1 = le32_to_cpu(p->des1);
int ret = tx_done;
@@ -70,12 +70,12 @@ static int ndesc_get_tx_len(struct dma_desc *p)
* and, if required, updates the multicast statistics.
* In case of success, it returns good_frame because the GMAC device
* is supposed to be able to compute the csum in HW. */
-static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
+static int ndesc_get_rx_status(struct net_device_stats *stats,
+ struct stmmac_extra_stats *x,
struct dma_desc *p)
{
int ret = good_frame;
unsigned int rdes0 = le32_to_cpu(p->des0);
- struct net_device_stats *stats = (struct net_device_stats *)data;
if (unlikely(rdes0 & RDES0_OWN))
return dma_own;
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 2b5b17d8b8a0..d218412ca832 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -14,9 +14,9 @@
#include "stmmac.h"
-static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb,
+ int csum)
{
- struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
unsigned int nopaged_len = skb_headlen(skb);
struct stmmac_priv *priv = tx_q->priv_data;
unsigned int entry = tx_q->cur_tx;
@@ -101,9 +101,8 @@ static unsigned int is_jumbo_frm(int len, int enh_desc)
return ret;
}
-static void refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(struct stmmac_rx_queue *rx_q, struct dma_desc *p)
{
- struct stmmac_rx_queue *rx_q = priv_ptr;
struct stmmac_priv *priv = rx_q->priv_data;
/* Fill DES3 in case of RING mode */
@@ -117,9 +116,8 @@ static void init_desc3(struct dma_desc *p)
p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
}
-static void clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(struct stmmac_tx_queue *tx_q, struct dma_desc *p)
{
- struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
struct stmmac_priv *priv = tx_q->priv_data;
unsigned int entry = tx_q->dirty_tx;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 3d15e1e92e18..07ea5ab0a60b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -92,6 +92,13 @@ struct stmmac_rx_buffer {
dma_addr_t sec_addr;
};
+struct stmmac_xdp_buff {
+ struct xdp_buff xdp;
+ struct stmmac_priv *priv;
+ struct dma_desc *desc;
+ struct dma_desc *ndesc;
+};
+
struct stmmac_rx_queue {
u32 rx_count_frames;
u32 queue_index;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 35c8dd92d369..2ae73ab842d4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -393,19 +393,10 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
if (priv->hw->pcs & STMMAC_PCS_RGMII ||
priv->hw->pcs & STMMAC_PCS_SGMII) {
- u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause;
-
/* Only support ANE */
if (cmd->base.autoneg != AUTONEG_ENABLE)
return -EINVAL;
- mask &= (ADVERTISED_1000baseT_Half |
- ADVERTISED_1000baseT_Full |
- ADVERTISED_100baseT_Half |
- ADVERTISED_100baseT_Full |
- ADVERTISED_10baseT_Half |
- ADVERTISED_10baseT_Full);
-
mutex_lock(&priv->lock);
stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
mutex_unlock(&priv->lock);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 17310ade88dd..f116e4ae293b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1134,20 +1134,26 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
static int stmmac_init_phy(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ struct fwnode_handle *phy_fwnode;
struct fwnode_handle *fwnode;
int ret;
+ if (!phylink_expects_phy(priv->phylink))
+ return 0;
+
fwnode = of_fwnode_handle(priv->plat->phylink_node);
if (!fwnode)
fwnode = dev_fwnode(priv->device);
if (fwnode)
- ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
+ phy_fwnode = fwnode_get_phy_node(fwnode);
+ else
+ phy_fwnode = NULL;
/* Some DT bindings do not set-up the PHY handle. Let's try to
* manually parse it
*/
- if (!fwnode || ret) {
+ if (!phy_fwnode || IS_ERR(phy_fwnode)) {
int addr = priv->plat->phy_addr;
struct phy_device *phydev;
@@ -1163,6 +1169,9 @@ static int stmmac_init_phy(struct net_device *dev)
}
ret = phylink_connect_phy(priv->phylink, phydev);
+ } else {
+ fwnode_handle_put(phy_fwnode);
+ ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
}
if (!priv->plat->pmt) {
@@ -1605,6 +1614,12 @@ static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv,
struct stmmac_rx_queue *rx_q = &dma_conf->rx_queue[queue];
int i;
+ /* struct stmmac_xdp_buff is using cb field (maximum size of 24 bytes)
+ * in struct xdp_buff_xsk to stash driver specific information. Thus,
+ * use this macro to make sure no size violations.
+ */
+ XSK_CHECK_PRIV_TYPE(struct stmmac_xdp_buff);
+
for (i = 0; i < dma_conf->dma_rx_size; i++) {
struct stmmac_rx_buffer *buf;
dma_addr_t dma_addr;
@@ -4989,6 +5004,16 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
return ret;
}
+static struct stmmac_xdp_buff *xsk_buff_to_stmmac_ctx(struct xdp_buff *xdp)
+{
+ /* In XDP zero copy data path, xdp field in struct xdp_buff_xsk is used
+ * to represent incoming packet, whereas cb field in the same structure
+ * is used to store driver specific info. Thus, struct stmmac_xdp_buff
+ * is laid on top of xdp and cb fields of struct xdp_buff_xsk.
+ */
+ return (struct stmmac_xdp_buff *)xdp;
+}
+
static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
{
struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
@@ -5018,6 +5043,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
}
while (count < limit) {
struct stmmac_rx_buffer *buf;
+ struct stmmac_xdp_buff *ctx;
unsigned int buf1_len = 0;
struct dma_desc *np, *p;
int entry;
@@ -5103,6 +5129,11 @@ read_again:
goto read_again;
}
+ ctx = xsk_buff_to_stmmac_ctx(buf->xdp);
+ ctx->priv = priv;
+ ctx->desc = p;
+ ctx->ndesc = np;
+
/* XDP ZC Frame only support primary buffers for now */
buf1_len = stmmac_rx_buf1_len(priv, p, status, len);
len += buf1_len;
@@ -5181,7 +5212,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
enum dma_data_direction dma_dir;
unsigned int desc_size;
struct sk_buff *skb = NULL;
- struct xdp_buff xdp;
+ struct stmmac_xdp_buff ctx;
int xdp_status = 0;
int buf_sz;
@@ -5302,17 +5333,22 @@ read_again:
dma_sync_single_for_cpu(priv->device, buf->addr,
buf1_len, dma_dir);
- xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
- xdp_prepare_buff(&xdp, page_address(buf->page),
- buf->page_offset, buf1_len, false);
+ xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
+ xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
+ buf->page_offset, buf1_len, true);
- pre_len = xdp.data_end - xdp.data_hard_start -
+ pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
buf->page_offset;
- skb = stmmac_xdp_run_prog(priv, &xdp);
+
+ ctx.priv = priv;
+ ctx.desc = p;
+ ctx.ndesc = np;
+
+ skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
/* Due xdp_adjust_tail: DMA sync for_device
* cover max len CPU touch
*/
- sync_len = xdp.data_end - xdp.data_hard_start -
+ sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
buf->page_offset;
sync_len = max(sync_len, pre_len);
@@ -5322,7 +5358,7 @@ read_again:
if (xdp_res & STMMAC_XDP_CONSUMED) {
page_pool_put_page(rx_q->page_pool,
- virt_to_head_page(xdp.data),
+ virt_to_head_page(ctx.xdp.data),
sync_len, true);
buf->page = NULL;
priv->dev->stats.rx_dropped++;
@@ -5350,7 +5386,7 @@ read_again:
if (!skb) {
/* XDP program may expand or reduce tail */
- buf1_len = xdp.data_end - xdp.data;
+ buf1_len = ctx.xdp.data_end - ctx.xdp.data;
skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
if (!skb) {
@@ -5360,7 +5396,7 @@ read_again:
}
/* XDP program may adjust header */
- skb_copy_to_linear_data(skb, xdp.data, buf1_len);
+ skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
skb_put(skb, buf1_len);
/* Data payload copied into SKB, page ready for recycle */
@@ -6341,6 +6377,10 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
bool is_double = false;
int ret;
+ ret = pm_runtime_resume_and_get(priv->device);
+ if (ret < 0)
+ return ret;
+
if (be16_to_cpu(proto) == ETH_P_8021AD)
is_double = true;
@@ -6348,16 +6388,18 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
ret = stmmac_vlan_update(priv, is_double);
if (ret) {
clear_bit(vid, priv->active_vlans);
- return ret;
+ goto err_pm_put;
}
if (priv->hw->num_vlan) {
ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
if (ret)
- return ret;
+ goto err_pm_put;
}
+err_pm_put:
+ pm_runtime_put(priv->device);
- return 0;
+ return ret;
}
static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
@@ -6622,6 +6664,8 @@ int stmmac_xdp_open(struct net_device *dev)
goto init_error;
}
+ stmmac_reset_queues_param(priv);
+
/* DMA CSR Channel configuration */
for (chan = 0; chan < dma_csr_ch; chan++) {
stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
@@ -6948,7 +6992,7 @@ static void stmmac_napi_del(struct net_device *dev)
int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
{
struct stmmac_priv *priv = netdev_priv(dev);
- int ret = 0;
+ int ret = 0, i;
if (netif_running(dev))
stmmac_release(dev);
@@ -6957,6 +7001,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
priv->plat->rx_queues_to_use = rx_cnt;
priv->plat->tx_queues_to_use = tx_cnt;
+ if (!netif_is_rxfh_configured(dev))
+ for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+ priv->rss.table[i] = ethtool_rxfh_indir_default(i,
+ rx_cnt);
stmmac_napi_add(dev);
@@ -7045,6 +7093,37 @@ void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
}
}
+static int stmmac_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
+{
+ const struct stmmac_xdp_buff *ctx = (void *)_ctx;
+ struct dma_desc *desc_contains_ts = ctx->desc;
+ struct stmmac_priv *priv = ctx->priv;
+ struct dma_desc *ndesc = ctx->ndesc;
+ struct dma_desc *desc = ctx->desc;
+ u64 ns = 0;
+
+ if (!priv->hwts_rx_en)
+ return -ENODATA;
+
+ /* For GMAC4, the valid timestamp is from CTX next desc. */
+ if (priv->plat->has_gmac4 || priv->plat->has_xgmac)
+ desc_contains_ts = ndesc;
+
+ /* Check if timestamp is available */
+ if (stmmac_get_rx_timestamp_status(priv, desc, ndesc, priv->adv_ts)) {
+ stmmac_get_timestamp(priv, desc_contains_ts, priv->adv_ts, &ns);
+ ns -= priv->plat->cdc_error_adj;
+ *timestamp = ns_to_ktime(ns);
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+static const struct xdp_metadata_ops stmmac_xdp_metadata_ops = {
+ .xmo_rx_timestamp = stmmac_xdp_rx_timestamp,
+};
+
/**
* stmmac_dvr_probe
* @device: device pointer
@@ -7152,6 +7231,8 @@ int stmmac_dvr_probe(struct device *device,
ndev->netdev_ops = &stmmac_netdev_ops;
+ ndev->xdp_metadata_ops = &stmmac_xdp_metadata_ops;
+
ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM;
ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
@@ -7238,6 +7319,10 @@ int stmmac_dvr_probe(struct device *device,
if (priv->dma_cap.rssen && priv->plat->rss_en)
ndev->features |= NETIF_F_RXHASH;
+ ndev->vlan_features |= ndev->features;
+ /* TSO doesn't work on VLANs yet */
+ ndev->vlan_features &= ~NETIF_F_TSO;
+
/* MTU range: 46 - hw-specific max */
ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
if (priv->plat->has_xgmac)
@@ -7260,6 +7345,8 @@ int stmmac_dvr_probe(struct device *device,
if (flow_ctrl)
priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
/* Setup channels NAPI */
stmmac_napi_add(ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 21aaa2730ac8..6807c4c1a0a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -281,9 +281,8 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg)
value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
& priv->hw->mii.clk_csr_mask;
- if (priv->plat->has_gmac4) {
+ if (priv->plat->has_gmac4)
value |= MII_GMAC4_READ;
- }
data = stmmac_mdio_read(priv, data, value);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 067a40fe0a23..eb0b2898daa3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -519,7 +519,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
of_device_is_compatible(np, "snps,dwmac-4.10a") ||
of_device_is_compatible(np, "snps,dwmac-4.20a") ||
- of_device_is_compatible(np, "snps,dwmac-5.10a")) {
+ of_device_is_compatible(np, "snps,dwmac-5.10a") ||
+ of_device_is_compatible(np, "snps,dwmac-5.20")) {
plat->has_gmac4 = 1;
plat->has_gmac = 0;
plat->pmt = 1;
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index ab8b09a9ef61..7a2e76776297 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -4522,7 +4522,7 @@ static int niu_alloc_channels(struct niu *np)
err = niu_rbr_fill(np, rp, GFP_KERNEL);
if (err)
- return err;
+ goto out_err;
}
tx_rings = kcalloc(num_tx_rings, sizeof(struct tx_ring_info),
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index ec85aef35bf9..b93613cd1994 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -14,9 +14,6 @@
* argument : macaddr=0x00,0x10,0x20,0x30,0x40,0x50
*/
-#include <asm/byteorder.h>
-#include <asm/dma.h>
-#include <asm/irq.h>
#include <linux/bitops.h>
#include <linux/crc32.h>
#include <linux/delay.h>
@@ -45,6 +42,10 @@
#include <linux/types.h>
#include <linux/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+
#ifdef CONFIG_SPARC
#include <asm/auxio.h>
#include <asm/idprom.h>
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index d17757ecbf42..11cbcd9e2c72 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1506,9 +1506,21 @@ static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned in
struct am65_cpsw_common *common = port->common;
if (common->pdata.extra_modes & BIT(state->interface)) {
- if (state->interface == PHY_INTERFACE_MODE_SGMII)
+ if (state->interface == PHY_INTERFACE_MODE_SGMII) {
writel(ADVERTISE_SGMII,
port->sgmii_base + AM65_CPSW_SGMII_MR_ADV_ABILITY_REG);
+ cpsw_sl_ctl_set(port->slave.mac_sl, CPSW_SL_CTL_EXT_EN);
+ } else {
+ cpsw_sl_ctl_clr(port->slave.mac_sl, CPSW_SL_CTL_EXT_EN);
+ }
+
+ if (state->interface == PHY_INTERFACE_MODE_USXGMII) {
+ cpsw_sl_ctl_set(port->slave.mac_sl,
+ CPSW_SL_CTL_XGIG | CPSW_SL_CTL_XGMII_EN);
+ } else {
+ cpsw_sl_ctl_clr(port->slave.mac_sl,
+ CPSW_SL_CTL_XGIG | CPSW_SL_CTL_XGMII_EN);
+ }
writel(AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE,
port->sgmii_base + AM65_CPSW_SGMII_CONTROL_REG);
@@ -1523,6 +1535,7 @@ static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned
struct am65_cpsw_port *port = container_of(slave, struct am65_cpsw_port, slave);
struct am65_cpsw_common *common = port->common;
struct net_device *ndev = port->ndev;
+ u32 mac_control;
int tmo;
/* disable forwarding */
@@ -1534,7 +1547,14 @@ static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned
dev_dbg(common->dev, "down msc_sl %08x tmo %d\n",
cpsw_sl_reg_read(port->slave.mac_sl, CPSW_SL_MACSTATUS), tmo);
- cpsw_sl_ctl_reset(port->slave.mac_sl);
+ /* All the bits that am65_cpsw_nuss_mac_link_up() can possibly set */
+ mac_control = CPSW_SL_CTL_GMII_EN | CPSW_SL_CTL_GIG | CPSW_SL_CTL_IFCTL_A |
+ CPSW_SL_CTL_FULLDUPLEX | CPSW_SL_CTL_RX_FLOW_EN | CPSW_SL_CTL_TX_FLOW_EN;
+ /* If interface mode is RGMII, CPSW_SL_CTL_EXT_EN might have been set for 10 Mbps */
+ if (phy_interface_mode_is_rgmii(interface))
+ mac_control |= CPSW_SL_CTL_EXT_EN;
+ /* Only clear those bits that can be set by am65_cpsw_nuss_mac_link_up() */
+ cpsw_sl_ctl_clr(port->slave.mac_sl, mac_control);
am65_cpsw_qos_link_down(ndev);
netif_tx_stop_all_queues(ndev);
@@ -1551,10 +1571,12 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy
u32 mac_control = CPSW_SL_CTL_GMII_EN;
struct net_device *ndev = port->ndev;
+ /* Bring the port out of idle state */
+ cpsw_sl_ctl_clr(port->slave.mac_sl, CPSW_SL_CTL_CMD_IDLE);
+
if (speed == SPEED_1000)
mac_control |= CPSW_SL_CTL_GIG;
- if (interface == PHY_INTERFACE_MODE_SGMII)
- mac_control |= CPSW_SL_CTL_EXT_EN;
+ /* TODO: Verify whether in-band is necessary for 10 Mbps RGMII */
if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface))
/* Can be used with in band mode only */
mac_control |= CPSW_SL_CTL_EXT_EN;
@@ -2157,7 +2179,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
/* Configuring Phylink */
port->slave.phylink_config.dev = &port->ndev->dev;
port->slave.phylink_config.type = PHYLINK_NETDEV;
- port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
+ port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+ MAC_1000FD | MAC_5000FD;
port->slave.phylink_config.mac_managed_pm = true; /* MAC does PM */
switch (port->slave.phy_if) {
@@ -2175,6 +2198,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
case PHY_INTERFACE_MODE_QSGMII:
case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_USXGMII:
if (common->pdata.extra_modes & BIT(port->slave.phy_if)) {
__set_bit(port->slave.phy_if,
port->slave.phylink_config.supported_interfaces);
@@ -2796,12 +2820,20 @@ static const struct am65_cpsw_pdata j721e_cpswxg_pdata = {
.extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII),
};
+static const struct am65_cpsw_pdata j784s4_cpswxg_pdata = {
+ .quirks = 0,
+ .ale_dev_id = "am64-cpswxg",
+ .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE,
+ .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_USXGMII),
+};
+
static const struct of_device_id am65_cpsw_nuss_of_mtable[] = {
{ .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0},
{ .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata},
{ .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata},
{ .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata},
{ .compatible = "ti,j721e-cpswxg-nuss", .data = &j721e_cpswxg_pdata},
+ { .compatible = "ti,j784s4-cpswxg-nuss", .data = &j784s4_cpswxg_pdata},
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable);
@@ -2957,7 +2989,8 @@ err_free_phylink:
am65_cpsw_nuss_phylink_cleanup(common);
am65_cpts_release(common->cpts);
err_of_clear:
- of_platform_device_destroy(common->mdio_dev, NULL);
+ if (common->mdio_dev)
+ of_platform_device_destroy(common->mdio_dev, NULL);
err_pm_clear:
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
@@ -2987,7 +3020,8 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
am65_cpts_release(common->cpts);
am65_cpsw_disable_serdes_phy(common);
- of_platform_device_destroy(common->mdio_dev, NULL);
+ if (common->mdio_dev)
+ of_platform_device_destroy(common->mdio_dev, NULL);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 37f0b62ec5d6..f9cd566d1c9b 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -27,7 +27,7 @@
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
#include <linux/if_vlan.h>
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 35128dd45ffc..c61e4e44a78f 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -7,6 +7,7 @@
#include <linux/io.h>
#include <linux/clk.h>
+#include <linux/platform_device.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/irqreturn.h>
@@ -23,7 +24,7 @@
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
#include <linux/if_vlan.h>
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index eb89a274083e..1e8d8b7b0c62 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1798,10 +1798,13 @@ static int wx_setup_rx_resources(struct wx_ring *rx_ring)
ret = wx_alloc_page_pool(rx_ring);
if (ret < 0) {
dev_err(rx_ring->dev, "Page pool creation failed: %d\n", ret);
- goto err;
+ goto err_desc;
}
return 0;
+
+err_desc:
+ dma_free_coherent(dev, rx_ring->size, rx_ring->desc, rx_ring->dma);
err:
kvfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
diff --git a/drivers/net/fddi/skfp/rmt.c b/drivers/net/fddi/skfp/rmt.c
index 37a89675dbeb..314623650a84 100644
--- a/drivers/net/fddi/skfp/rmt.c
+++ b/drivers/net/fddi/skfp/rmt.c
@@ -234,9 +234,9 @@ static void rmt_fsm(struct s_smc *smc, int cmd)
if (smc->r.rm_join) {
smc->r.sm_ma_avail = TRUE ;
if (smc->mib.m[MAC0].fddiMACMA_UnitdataEnable)
- smc->mib.m[MAC0].fddiMACMA_UnitdataAvailable = TRUE ;
- else
- smc->mib.m[MAC0].fddiMACMA_UnitdataAvailable = FALSE ;
+ smc->mib.m[MAC0].fddiMACMA_UnitdataAvailable = TRUE;
+ else
+ smc->mib.m[MAC0].fddiMACMA_UnitdataAvailable = FALSE;
}
DB_RMTN(1, "RMT : RING UP");
RS_CLEAR(smc,RS_NORINGOP) ;
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index a9c44f08199d..a94c7bd5db2e 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -47,7 +47,7 @@ config BPQETHER
config SCC
tristate "Z8530 SCC driver"
- depends on ISA && AX25 && ISA_DMA_API
+ depends on ISA && AX25
help
These cards are used to connect your Linux box to an amateur radio
in order to communicate with other computers. If you want to use
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 25616247d7a5..3427993f94f7 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1021,8 +1021,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
* the SecTAG, so we have to deduce which port to deliver to.
*/
if (macsec_is_offloaded(macsec) && netif_running(ndev)) {
- if (md_dst && md_dst->type == METADATA_MACSEC &&
- (!find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci)))
+ struct macsec_rx_sc *rx_sc = NULL;
+
+ if (md_dst && md_dst->type == METADATA_MACSEC)
+ rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci);
+
+ if (md_dst && md_dst->type == METADATA_MACSEC && !rx_sc)
continue;
if (ether_addr_equal_64bits(hdr->h_dest,
@@ -1047,7 +1051,13 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
nskb->pkt_type = PACKET_MULTICAST;
__netif_rx(nskb);
+ } else if (rx_sc || ndev->flags & IFF_PROMISC) {
+ skb->dev = ndev;
+ skb->pkt_type = PACKET_HOST;
+ ret = RX_HANDLER_ANOTHER;
+ goto out;
}
+
continue;
}
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 90309980686e..9ff2e6f22f3f 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -65,6 +65,7 @@ config MDIO_ASPEED
tristate "ASPEED MDIO bus controller"
depends on ARCH_ASPEED || COMPILE_TEST
depends on OF_MDIO && HAS_IOMEM
+ depends on MDIO_DEVRES
help
This module provides a driver for the independent MDIO bus
controllers found in the ASPEED AST2600 SoC. This is a driver for the
@@ -170,6 +171,7 @@ config MDIO_IPQ4019
tristate "Qualcomm IPQ4019 MDIO interface support"
depends on HAS_IOMEM && OF_MDIO
depends on COMMON_CLK
+ depends on MDIO_DEVRES
help
This driver supports the MDIO interface found in Qualcomm
IPQ40xx, IPQ60xx, IPQ807x and IPQ50xx series Soc-s.
@@ -178,6 +180,7 @@ config MDIO_IPQ8064
tristate "Qualcomm IPQ8064 MDIO interface support"
depends on HAS_IOMEM && OF_MDIO
depends on MFD_SYSCON
+ depends on MDIO_DEVRES
help
This driver supports the MDIO interface found in the network
interface units of the IPQ8064 SoC
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 6b9525def973..513675ae4dd2 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -44,6 +44,14 @@ config LED_TRIGGER_PHY
<Speed in megabits>Mbps OR <Speed in gigabits>Gbps OR link
for any speed known to the PHY.
+config PHYLIB_LEDS
+ bool "Support probing LEDs from device tree"
+ depends on LEDS_CLASS=y || LEDS_CLASS=PHYLIB
+ depends on OF
+ default y
+ help
+ When LED class support is enabled, phylib can automatically
+ probe LED setting from device tree.
config FIXED_PHY
tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
diff --git a/drivers/net/phy/aquantia_hwmon.c b/drivers/net/phy/aquantia_hwmon.c
index 19c4c280a6cd..0da451e46f69 100644
--- a/drivers/net/phy/aquantia_hwmon.c
+++ b/drivers/net/phy/aquantia_hwmon.c
@@ -210,7 +210,7 @@ static const struct hwmon_channel_info aqr_hwmon_temp = {
.config = aqr_hwmon_temp_config,
};
-static const struct hwmon_channel_info *aqr_hwmon_info[] = {
+static const struct hwmon_channel_info * const aqr_hwmon_info[] = {
&aqr_hwmon_chip,
&aqr_hwmon_temp,
NULL,
diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index d8f3024860dc..d43076592f81 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -364,7 +364,7 @@ static int bcm54140_hwmon_write(struct device *dev,
}
}
-static const struct hwmon_channel_info *bcm54140_hwmon_info[] = {
+static const struct hwmon_channel_info * const bcm54140_hwmon_info[] = {
HWMON_CHANNEL_INFO(temp,
HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
HWMON_T_ALARM),
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 63a3644d86c9..cd5d0ed9c5e5 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -144,11 +144,15 @@
/* WOL Event Interrupt Enable */
#define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7)
-/* LED Timer Control Register */
-#define MII_88E1318S_PHY_LED_TCR 0x12
-#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15)
-#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7)
-#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW BIT(11)
+#define MII_88E1318S_PHY_LED_FUNC 0x10
+#define MII_88E1318S_PHY_LED_FUNC_OFF (0x8)
+#define MII_88E1318S_PHY_LED_FUNC_ON (0x9)
+#define MII_88E1318S_PHY_LED_FUNC_HI_Z (0xa)
+#define MII_88E1318S_PHY_LED_FUNC_BLINK (0xb)
+#define MII_88E1318S_PHY_LED_TCR 0x12
+#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15)
+#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7)
+#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW BIT(11)
/* Magic Packet MAC address registers */
#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2 0x17
@@ -2735,7 +2739,7 @@ static const struct hwmon_channel_info marvell_hwmon_temp = {
.config = marvell_hwmon_temp_config,
};
-static const struct hwmon_channel_info *marvell_hwmon_info[] = {
+static const struct hwmon_channel_info * const marvell_hwmon_info[] = {
&marvell_hwmon_chip,
&marvell_hwmon_temp,
NULL
@@ -2832,6 +2836,63 @@ static int marvell_hwmon_probe(struct phy_device *phydev)
}
#endif
+static int m88e1318_led_brightness_set(struct phy_device *phydev,
+ u8 index, enum led_brightness value)
+{
+ int reg;
+
+ reg = phy_read_paged(phydev, MII_MARVELL_LED_PAGE,
+ MII_88E1318S_PHY_LED_FUNC);
+ if (reg < 0)
+ return reg;
+
+ switch (index) {
+ case 0:
+ case 1:
+ case 2:
+ reg &= ~(0xf << (4 * index));
+ if (value == LED_OFF)
+ reg |= MII_88E1318S_PHY_LED_FUNC_OFF << (4 * index);
+ else
+ reg |= MII_88E1318S_PHY_LED_FUNC_ON << (4 * index);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return phy_write_paged(phydev, MII_MARVELL_LED_PAGE,
+ MII_88E1318S_PHY_LED_FUNC, reg);
+}
+
+static int m88e1318_led_blink_set(struct phy_device *phydev, u8 index,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ int reg;
+
+ reg = phy_read_paged(phydev, MII_MARVELL_LED_PAGE,
+ MII_88E1318S_PHY_LED_FUNC);
+ if (reg < 0)
+ return reg;
+
+ switch (index) {
+ case 0:
+ case 1:
+ case 2:
+ reg &= ~(0xf << (4 * index));
+ reg |= MII_88E1318S_PHY_LED_FUNC_BLINK << (4 * index);
+ /* Reset default is 84ms */
+ *delay_on = 84 / 2;
+ *delay_off = 84 / 2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return phy_write_paged(phydev, MII_MARVELL_LED_PAGE,
+ MII_88E1318S_PHY_LED_FUNC, reg);
+}
+
static int marvell_probe(struct phy_device *phydev)
{
struct marvell_priv *priv;
@@ -3081,6 +3142,8 @@ static struct phy_driver marvell_drivers[] = {
.get_sset_count = marvell_get_sset_count,
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
+ .led_brightness_set = m88e1318_led_brightness_set,
+ .led_blink_set = m88e1318_led_blink_set,
},
{
.phy_id = MARVELL_PHY_ID_88E1145,
@@ -3187,6 +3250,8 @@ static struct phy_driver marvell_drivers[] = {
.cable_test_start = marvell_vct7_cable_test_start,
.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
.cable_test_get_status = marvell_vct7_cable_test_get_status,
+ .led_brightness_set = m88e1318_led_brightness_set,
+ .led_blink_set = m88e1318_led_blink_set,
},
{
.phy_id = MARVELL_PHY_ID_88E1540,
@@ -3213,6 +3278,8 @@ static struct phy_driver marvell_drivers[] = {
.cable_test_start = marvell_vct7_cable_test_start,
.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
.cable_test_get_status = marvell_vct7_cable_test_get_status,
+ .led_brightness_set = m88e1318_led_brightness_set,
+ .led_blink_set = m88e1318_led_blink_set,
},
{
.phy_id = MARVELL_PHY_ID_88E1545,
@@ -3239,6 +3306,8 @@ static struct phy_driver marvell_drivers[] = {
.cable_test_start = marvell_vct7_cable_test_start,
.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
.cable_test_get_status = marvell_vct7_cable_test_get_status,
+ .led_brightness_set = m88e1318_led_brightness_set,
+ .led_blink_set = m88e1318_led_blink_set,
},
{
.phy_id = MARVELL_PHY_ID_88E3016,
@@ -3380,6 +3449,8 @@ static struct phy_driver marvell_drivers[] = {
.get_stats = marvell_get_stats,
.get_tunable = m88e1540_get_tunable,
.set_tunable = m88e1540_set_tunable,
+ .led_brightness_set = m88e1318_led_brightness_set,
+ .led_blink_set = m88e1318_led_blink_set,
},
};
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 383a9c9f36e5..55d9d7acc32e 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -243,7 +243,7 @@ static const struct hwmon_channel_info mv3310_hwmon_temp = {
.config = mv3310_hwmon_temp_config,
};
-static const struct hwmon_channel_info *mv3310_hwmon_info[] = {
+static const struct hwmon_channel_info * const mv3310_hwmon_info[] = {
&mv3310_hwmon_chip,
&mv3310_hwmon_temp,
NULL,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2184b1e859ae..3f81bb8dac44 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -10,13 +10,13 @@
* Copyright (c) 2014 Johan Hovold <johan@kernel.org>
*
* Support : Micrel Phys:
- * Giga phys: ksz9021, ksz9031, ksz9131
+ * Giga phys: ksz9021, ksz9031, ksz9131, lan8841, lan8814
* 100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
* ksz8021, ksz8031, ksz8051,
* ksz8081, ksz8091,
* ksz8061,
* Switch : ksz8873, ksz886x
- * ksz9477
+ * ksz9477, lan8804
*/
#include <linux/bitfield.h>
@@ -3761,7 +3761,7 @@ static int lan8841_ptp_update_target(struct kszphy_ptp_priv *ptp_priv,
const struct timespec64 *ts)
{
return lan8841_ptp_set_target(ptp_priv, LAN8841_EVENT_A,
- ts->tv_sec + LAN8841_BUFFER_TIME, ts->tv_nsec);
+ ts->tv_sec + LAN8841_BUFFER_TIME, 0);
}
#define LAN8841_PTP_LTC_TARGET_RELOAD_SEC_HI(event) ((event) == LAN8841_EVENT_A ? 282 : 292)
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index 8e6bb97b5f85..6301a9abfb95 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -182,7 +182,7 @@ static umode_t gpy_hwmon_is_visible(const void *data,
return 0444;
}
-static const struct hwmon_channel_info *gpy_hwmon_info[] = {
+static const struct hwmon_channel_info * const gpy_hwmon_info[] = {
HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
NULL
};
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 5813b07242ce..029875a59ff8 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -191,7 +191,7 @@
#define MAX_ID_PS 2260U
#define DEFAULT_ID_PS 2000U
-#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK(31, 0) * (ppb) * \
+#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK_ULL(31, 0) * (ppb) * \
PTP_CLK_PERIOD_100BT1, NSEC_PER_SEC)
#define NXP_C45_SKB_CB(skb) ((struct nxp_c45_skb_cb *)(skb)->cb)
@@ -1337,6 +1337,17 @@ no_ptp_support:
return ret;
}
+static void nxp_c45_remove(struct phy_device *phydev)
+{
+ struct nxp_c45_phy *priv = phydev->priv;
+
+ if (priv->ptp_clock)
+ ptp_clock_unregister(priv->ptp_clock);
+
+ skb_queue_purge(&priv->tx_queue);
+ skb_queue_purge(&priv->rx_queue);
+}
+
static struct phy_driver nxp_c45_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_TJA_1103),
@@ -1359,6 +1370,7 @@ static struct phy_driver nxp_c45_driver[] = {
.set_loopback = genphy_c45_loopback,
.get_sqi = nxp_c45_get_sqi,
.get_sqi_max = nxp_c45_get_sqi_max,
+ .remove = nxp_c45_remove,
},
};
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index ec91e671f8aa..b13e15310feb 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -477,7 +477,7 @@ static umode_t tja11xx_hwmon_is_visible(const void *data,
return 0;
}
-static const struct hwmon_channel_info *tja11xx_hwmon_info[] = {
+static const struct hwmon_channel_info * const tja11xx_hwmon_info[] = {
HWMON_CHANNEL_INFO(in, HWMON_I_LCRIT_ALARM),
HWMON_CHANNEL_INFO(temp, HWMON_T_CRIT_ALARM),
NULL
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 917ba84105fc..d373446ab5ac 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -19,10 +19,12 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/mdio.h>
#include <linux/mii.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/netdevice.h>
#include <linux/phy.h>
#include <linux/phy_led_triggers.h>
@@ -674,6 +676,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
device_initialize(&mdiodev->dev);
dev->state = PHY_DOWN;
+ INIT_LIST_HEAD(&dev->leds);
mutex_init(&dev->lock);
INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
@@ -2988,6 +2991,101 @@ static bool phy_drv_supports_irq(struct phy_driver *phydrv)
return phydrv->config_intr && phydrv->handle_interrupt;
}
+static int phy_led_set_brightness(struct led_classdev *led_cdev,
+ enum led_brightness value)
+{
+ struct phy_led *phyled = to_phy_led(led_cdev);
+ struct phy_device *phydev = phyled->phydev;
+ int err;
+
+ mutex_lock(&phydev->lock);
+ err = phydev->drv->led_brightness_set(phydev, phyled->index, value);
+ mutex_unlock(&phydev->lock);
+
+ return err;
+}
+
+static int phy_led_blink_set(struct led_classdev *led_cdev,
+ unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ struct phy_led *phyled = to_phy_led(led_cdev);
+ struct phy_device *phydev = phyled->phydev;
+ int err;
+
+ mutex_lock(&phydev->lock);
+ err = phydev->drv->led_blink_set(phydev, phyled->index,
+ delay_on, delay_off);
+ mutex_unlock(&phydev->lock);
+
+ return err;
+}
+
+static int of_phy_led(struct phy_device *phydev,
+ struct device_node *led)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct led_init_data init_data = {};
+ struct led_classdev *cdev;
+ struct phy_led *phyled;
+ int err;
+
+ phyled = devm_kzalloc(dev, sizeof(*phyled), GFP_KERNEL);
+ if (!phyled)
+ return -ENOMEM;
+
+ cdev = &phyled->led_cdev;
+ phyled->phydev = phydev;
+
+ err = of_property_read_u8(led, "reg", &phyled->index);
+ if (err)
+ return err;
+
+ if (phydev->drv->led_brightness_set)
+ cdev->brightness_set_blocking = phy_led_set_brightness;
+ if (phydev->drv->led_blink_set)
+ cdev->blink_set = phy_led_blink_set;
+ cdev->max_brightness = 1;
+ init_data.devicename = dev_name(&phydev->mdio.dev);
+ init_data.fwnode = of_fwnode_handle(led);
+ init_data.devname_mandatory = true;
+
+ err = devm_led_classdev_register_ext(dev, cdev, &init_data);
+ if (err)
+ return err;
+
+ list_add(&phyled->list, &phydev->leds);
+
+ return 0;
+}
+
+static int of_phy_leds(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct device_node *leds, *led;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_OF_MDIO))
+ return 0;
+
+ if (!node)
+ return 0;
+
+ leds = of_get_child_by_name(node, "leds");
+ if (!leds)
+ return 0;
+
+ for_each_available_child_of_node(leds, led) {
+ err = of_phy_led(phydev, led);
+ if (err) {
+ of_node_put(led);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
/**
* fwnode_mdio_find_device - Given a fwnode, find the mdio_device
* @fwnode: pointer to the mdio_device's fwnode
@@ -3183,6 +3281,12 @@ static int phy_probe(struct device *dev)
/* Set the state to READY by default */
phydev->state = PHY_READY;
+ /* Get the LEDs from the device tree, and instantiate standard
+ * LEDs for them.
+ */
+ if (IS_ENABLED(CONFIG_PHYLIB_LEDS))
+ err = of_phy_leds(phydev);
+
out:
/* Re-assert the reset signal on error */
if (err)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index f7da96f0c75b..a4111f1be375 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1583,6 +1583,25 @@ void phylink_destroy(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_destroy);
+/**
+ * phylink_expects_phy() - Determine if phylink expects a phy to be attached
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X,
+ * no PHY is needed.
+ *
+ * Returns true if phylink will be expecting a PHY.
+ */
+bool phylink_expects_phy(struct phylink *pl)
+{
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED ||
+ (pl->cfg_link_an_mode == MLO_AN_INBAND &&
+ phy_interface_mode_is_8023z(pl->link_config.interface)))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(phylink_expects_phy);
+
static void phylink_phy_change(struct phy_device *phydev, bool up)
{
struct phylink *pl = phydev->phylink;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index f0fcb06fbe82..89636dc71e48 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -210,6 +210,12 @@ static const enum gpiod_flags gpio_flags[] = {
#define SFP_PHY_ADDR 22
#define SFP_PHY_ADDR_ROLLBALL 17
+/* SFP_EEPROM_BLOCK_SIZE is the size of data chunk to read the EEPROM
+ * at a time. Some SFP modules and also some Linux I2C drivers do not like
+ * reads longer than 16 bytes.
+ */
+#define SFP_EEPROM_BLOCK_SIZE 16
+
struct sff_data {
unsigned int gpios;
bool (*module_supported)(const struct sfp_eeprom_id *id);
@@ -406,6 +412,10 @@ static const struct sfp_quirk sfp_quirks[] = {
SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp),
+ // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports
+ // 2600MBd in their EERPOM
+ SFP_QUIRK_M("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex),
+
// Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in
// their EEPROM
SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex,
@@ -1378,7 +1388,7 @@ static const struct hwmon_ops sfp_hwmon_ops = {
.read_string = sfp_hwmon_read_string,
};
-static const struct hwmon_channel_info *sfp_hwmon_info[] = {
+static const struct hwmon_channel_info * const sfp_hwmon_info[] = {
HWMON_CHANNEL_INFO(chip,
HWMON_C_REGISTER_TZ),
HWMON_CHANNEL_INFO(in,
@@ -1947,11 +1957,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
u8 check;
int ret;
- /* Some SFP modules and also some Linux I2C drivers do not like reads
- * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at
- * a time.
- */
- sfp->i2c_block_size = 16;
+ sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE;
ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base));
if (ret < 0) {
@@ -2509,6 +2515,9 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee,
unsigned int first, last, len;
int ret;
+ if (!(sfp->state & SFP_F_PRESENT))
+ return -ENODEV;
+
if (ee->len == 0)
return -EINVAL;
@@ -2541,6 +2550,9 @@ static int sfp_module_eeprom_by_page(struct sfp *sfp,
const struct ethtool_module_eeprom *page,
struct netlink_ext_ack *extack)
{
+ if (!(sfp->state & SFP_F_PRESENT))
+ return -ENODEV;
+
if (page->bank) {
NL_SET_ERR_MSG(extack, "Banks not supported");
return -EOPNOTSUPP;
@@ -2645,6 +2657,7 @@ static struct sfp *sfp_alloc(struct device *dev)
return ERR_PTR(-ENOMEM);
sfp->dev = dev;
+ sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE;
mutex_init(&sfp->sm_mutex);
mutex_init(&sfp->st_mutex);
diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c
index 26ef3706445e..0c1e8970ee58 100644
--- a/drivers/net/thunderbolt/main.c
+++ b/drivers/net/thunderbolt/main.c
@@ -148,7 +148,7 @@ struct tbnet_ring {
/**
* struct tbnet - ThunderboltIP network driver private data
* @svc: XDomain service the driver is bound to
- * @xd: XDomain the service blongs to
+ * @xd: XDomain the service belongs to
* @handler: ThunderboltIP configuration protocol handler
* @dev: Networking device
* @napi: NAPI structure for Rx polling
@@ -764,7 +764,7 @@ static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
*/
if (net->skb && net->rx_hdr.frame_count) {
/* Check the frame count fits the count field */
- if (frame_count != net->rx_hdr.frame_count) {
+ if (frame_count != le32_to_cpu(net->rx_hdr.frame_count)) {
net->stats.rx_length_errors++;
return false;
}
@@ -772,8 +772,8 @@ static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
/* Check the frame identifiers are incremented correctly,
* and id is matching.
*/
- if (frame_index != net->rx_hdr.frame_index + 1 ||
- frame_id != net->rx_hdr.frame_id) {
+ if (frame_index != le16_to_cpu(net->rx_hdr.frame_index) + 1 ||
+ frame_id != le16_to_cpu(net->rx_hdr.frame_id)) {
net->stats.rx_missed_errors++;
return false;
}
@@ -873,11 +873,12 @@ static int tbnet_poll(struct napi_struct *napi, int budget)
TBNET_RX_PAGE_SIZE - hdr_size);
}
- net->rx_hdr.frame_size = frame_size;
- net->rx_hdr.frame_count = le32_to_cpu(hdr->frame_count);
- net->rx_hdr.frame_index = le16_to_cpu(hdr->frame_index);
- net->rx_hdr.frame_id = le16_to_cpu(hdr->frame_id);
- last = net->rx_hdr.frame_index == net->rx_hdr.frame_count - 1;
+ net->rx_hdr.frame_size = hdr->frame_size;
+ net->rx_hdr.frame_count = hdr->frame_count;
+ net->rx_hdr.frame_index = hdr->frame_index;
+ net->rx_hdr.frame_id = hdr->frame_id;
+ last = le16_to_cpu(net->rx_hdr.frame_index) ==
+ le32_to_cpu(net->rx_hdr.frame_count) - 1;
rx_packets++;
net->stats.rx_bytes += frame_size;
@@ -990,8 +991,10 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
{
struct thunderbolt_ip_frame_header *hdr = page_address(frames[0]->page);
struct device *dma_dev = tb_ring_dma_device(net->tx_ring.ring);
- __wsum wsum = htonl(skb->len - skb_transport_offset(skb));
unsigned int i, len, offset = skb_transport_offset(skb);
+ /* Remove payload length from checksum */
+ u32 paylen = skb->len - skb_transport_offset(skb);
+ __wsum wsum = (__force __wsum)htonl(paylen);
__be16 protocol = skb->protocol;
void *data = skb->data;
void *dest = hdr + 1;
@@ -1027,7 +1030,7 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
/* Data points on the beginning of packet.
* Check is the checksum absolute place in the packet.
* ipcso will update IP checksum.
- * tucso will update TCP/UPD checksum.
+ * tucso will update TCP/UDP checksum.
*/
if (protocol == htons(ETH_P_IP)) {
__sum16 *ipcso = dest + ((void *)&(ip_hdr(skb)->check) - data);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index decb5ba56a25..0fc4b959edc1 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1943,7 +1943,7 @@ static struct rx_agg *alloc_rx_agg(struct r8152 *tp, gfp_t mflags)
if (!rx_agg)
return NULL;
- rx_agg->page = alloc_pages(mflags | __GFP_COMP, order);
+ rx_agg->page = alloc_pages(mflags | __GFP_COMP | __GFP_NOWARN, order);
if (!rx_agg->page)
goto free_rx;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index c1178915496d..4b3c6647edc6 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1262,11 +1262,12 @@ static void veth_set_xdp_features(struct net_device *dev)
peer = rtnl_dereference(priv->peer);
if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+ struct veth_priv *priv_peer = netdev_priv(peer);
xdp_features_t val = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG;
- if (priv->_xdp_prog || veth_gro_requested(dev))
+ if (priv_peer->_xdp_prog || veth_gro_requested(peer))
val |= NETDEV_XDP_ACT_NDO_XMIT |
NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(dev, val);
@@ -1504,19 +1505,23 @@ static int veth_set_features(struct net_device *dev,
{
netdev_features_t changed = features ^ dev->features;
struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
int err;
if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
return 0;
+ peer = rtnl_dereference(priv->peer);
if (features & NETIF_F_GRO) {
err = veth_napi_enable(dev);
if (err)
return err;
- xdp_features_set_redirect_target(dev, true);
+ if (peer)
+ xdp_features_set_redirect_target(peer, true);
} else {
- xdp_features_clear_redirect_target(dev);
+ if (peer)
+ xdp_features_clear_redirect_target(peer);
veth_napi_del(dev);
}
return 0;
@@ -1598,13 +1603,13 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
peer->max_mtu = max_mtu;
}
- xdp_features_set_redirect_target(dev, true);
+ xdp_features_set_redirect_target(peer, true);
}
if (old_prog) {
if (!prog) {
- if (!veth_gro_requested(dev))
- xdp_features_clear_redirect_target(dev);
+ if (peer && !veth_gro_requested(dev))
+ xdp_features_clear_redirect_target(peer);
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);
@@ -1648,14 +1653,18 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
-static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
+static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
{
struct veth_xdp_buff *_ctx = (void *)ctx;
+ struct sk_buff *skb = _ctx->skb;
- if (!_ctx->skb)
+ if (!skb)
return -ENODATA;
- *hash = skb_get_hash(_ctx->skb);
+ *hash = skb_get_hash(skb);
+ *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE;
+
return 0;
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e2560b6f7980..8d8038538fc4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -815,8 +815,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
int page_off,
unsigned int *len)
{
- struct page *page = alloc_page(GFP_ATOMIC);
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page *page;
+
+ if (page_off + *len + tailroom > PAGE_SIZE)
+ return NULL;
+ page = alloc_page(GFP_ATOMIC);
if (!page)
return NULL;
@@ -824,7 +829,6 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
page_off += *len;
while (--*num_buf) {
- int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index da488cbb0542..f2b76ee866a4 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1504,7 +1504,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
goto rcd_done;
}
- if (rxDataRingUsed) {
+ if (rxDataRingUsed && adapter->rxdataring_enabled) {
size_t sz;
BUG_ON(rcd->len > rq->data_ring.desc_size);
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 86995e8dc913..a62ee05c5409 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -16,7 +16,7 @@
#include "pci.h"
#include "pcic.h"
-#define MHI_TIMEOUT_DEFAULT_MS 90000
+#define MHI_TIMEOUT_DEFAULT_MS 20000
#define RDDM_DUMP_SIZE 0x420000
static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index f5dc3bb11b64..ff710b0b5071 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1001,15 +1001,34 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids);
-static void brcmf_sdiod_acpi_set_power_manageable(struct device *dev,
- int val)
+static void brcmf_sdiod_acpi_save_power_manageable(struct brcmf_sdio_dev *sdiodev)
{
#if IS_ENABLED(CONFIG_ACPI)
struct acpi_device *adev;
- adev = ACPI_COMPANION(dev);
+ adev = ACPI_COMPANION(&sdiodev->func1->dev);
if (adev)
- adev->flags.power_manageable = 0;
+ sdiodev->func1_power_manageable = adev->flags.power_manageable;
+
+ adev = ACPI_COMPANION(&sdiodev->func2->dev);
+ if (adev)
+ sdiodev->func2_power_manageable = adev->flags.power_manageable;
+#endif
+}
+
+static void brcmf_sdiod_acpi_set_power_manageable(struct brcmf_sdio_dev *sdiodev,
+ int enable)
+{
+#if IS_ENABLED(CONFIG_ACPI)
+ struct acpi_device *adev;
+
+ adev = ACPI_COMPANION(&sdiodev->func1->dev);
+ if (adev)
+ adev->flags.power_manageable = enable ? sdiodev->func1_power_manageable : 0;
+
+ adev = ACPI_COMPANION(&sdiodev->func2->dev);
+ if (adev)
+ adev->flags.power_manageable = enable ? sdiodev->func2_power_manageable : 0;
#endif
}
@@ -1019,7 +1038,6 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
int err;
struct brcmf_sdio_dev *sdiodev;
struct brcmf_bus *bus_if;
- struct device *dev;
brcmf_dbg(SDIO, "Enter\n");
brcmf_dbg(SDIO, "Class=%x\n", func->class);
@@ -1027,14 +1045,9 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
brcmf_dbg(SDIO, "sdio device ID: 0x%04x\n", func->device);
brcmf_dbg(SDIO, "Function#: %d\n", func->num);
- dev = &func->dev;
-
/* Set MMC_QUIRK_LENIENT_FN0 for this card */
func->card->quirks |= MMC_QUIRK_LENIENT_FN0;
- /* prohibit ACPI power management for this device */
- brcmf_sdiod_acpi_set_power_manageable(dev, 0);
-
/* Consume func num 1 but dont do anything with it. */
if (func->num == 1)
return 0;
@@ -1066,6 +1079,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
dev_set_drvdata(&sdiodev->func1->dev, bus_if);
sdiodev->dev = &sdiodev->func1->dev;
+ brcmf_sdiod_acpi_save_power_manageable(sdiodev);
brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_DOWN);
brcmf_dbg(SDIO, "F2 found, calling brcmf_sdiod_probe...\n");
@@ -1131,6 +1145,8 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
if (sdiodev->settings->bus.sdio.oob_irq_supported ||
pm_caps & MMC_PM_WAKE_SDIO_IRQ) {
+ /* Stop ACPI from turning off the device when wowl is enabled */
+ brcmf_sdiod_acpi_set_power_manageable(sdiodev, !enabled);
sdiodev->wowl_enabled = enabled;
brcmf_dbg(SDIO, "Configuring WOWL, enabled=%d\n", enabled);
return;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
index b76d34d36bde..0d18ed15b403 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -188,6 +188,8 @@ struct brcmf_sdio_dev {
char nvram_name[BRCMF_FW_NAME_LEN];
char clm_name[BRCMF_FW_NAME_LEN];
bool wowl_enabled;
+ bool func1_power_manageable;
+ bool func2_power_manageable;
enum brcmf_sdiod_state state;
struct brcmf_sdiod_freezer *freezer;
const struct firmware *clm_fw;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index ca50feb0b3a9..1b1358c6bb46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -512,15 +512,15 @@ mt7603_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
return -EOPNOTSUPP;
- if (cmd == SET_KEY) {
- key->hw_key_idx = wcid->idx;
- wcid->hw_key_idx = idx;
- } else {
+ if (cmd != SET_KEY) {
if (idx == wcid->hw_key_idx)
wcid->hw_key_idx = -1;
- key = NULL;
+ return 0;
}
+
+ key->hw_key_idx = wcid->idx;
+ wcid->hw_key_idx = idx;
mt76_wcid_key_setup(&dev->mt76, wcid, key);
return mt7603_wtbl_set_key(dev, wcid->idx, key);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index d90378a30d15..da1d17b73a25 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_enable_rtscts);
static int
mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
struct ieee80211_key_conf *key,
- enum mt76_cipher_type cipher, u16 cipher_mask,
- enum set_key_cmd cmd)
+ enum mt76_cipher_type cipher, u16 cipher_mask)
{
u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4;
u8 data[32] = {};
@@ -1198,27 +1197,18 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
return -EINVAL;
mt76_rr_copy(dev, addr, data, sizeof(data));
- if (cmd == SET_KEY) {
- if (cipher == MT_CIPHER_TKIP) {
- /* Rx/Tx MIC keys are swapped */
- memcpy(data, key->key, 16);
- memcpy(data + 16, key->key + 24, 8);
- memcpy(data + 24, key->key + 16, 8);
- } else {
- if (cipher_mask == BIT(cipher))
- memcpy(data, key->key, key->keylen);
- else if (cipher != MT_CIPHER_BIP_CMAC_128)
- memcpy(data, key->key, 16);
- if (cipher == MT_CIPHER_BIP_CMAC_128)
- memcpy(data + 16, key->key, 16);
- }
+ if (cipher == MT_CIPHER_TKIP) {
+ /* Rx/Tx MIC keys are swapped */
+ memcpy(data, key->key, 16);
+ memcpy(data + 16, key->key + 24, 8);
+ memcpy(data + 24, key->key + 16, 8);
} else {
+ if (cipher_mask == BIT(cipher))
+ memcpy(data, key->key, key->keylen);
+ else if (cipher != MT_CIPHER_BIP_CMAC_128)
+ memcpy(data, key->key, 16);
if (cipher == MT_CIPHER_BIP_CMAC_128)
- memset(data + 16, 0, 16);
- else if (cipher_mask)
- memset(data, 0, 16);
- if (!cipher_mask)
- memset(data, 0, sizeof(data));
+ memcpy(data + 16, key->key, 16);
}
mt76_wr_copy(dev, addr, data, sizeof(data));
@@ -1229,7 +1219,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
static int
mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
enum mt76_cipher_type cipher, u16 cipher_mask,
- int keyidx, enum set_key_cmd cmd)
+ int keyidx)
{
u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1;
@@ -1248,9 +1238,7 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
else
w0 &= ~MT_WTBL_W0_RX_IK_VALID;
- if (cmd == SET_KEY &&
- (cipher != MT_CIPHER_BIP_CMAC_128 ||
- cipher_mask == BIT(cipher))) {
+ if (cipher != MT_CIPHER_BIP_CMAC_128 || cipher_mask == BIT(cipher)) {
w0 &= ~MT_WTBL_W0_KEY_IDX;
w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx);
}
@@ -1267,19 +1255,10 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
static void
mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
- enum mt76_cipher_type cipher, u16 cipher_mask,
- enum set_key_cmd cmd)
+ enum mt76_cipher_type cipher, u16 cipher_mask)
{
u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx);
- if (!cipher_mask) {
- mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE);
- return;
- }
-
- if (cmd != SET_KEY)
- return;
-
if (cipher == MT_CIPHER_BIP_CMAC_128 &&
cipher_mask & ~BIT(MT_CIPHER_BIP_CMAC_128))
return;
@@ -1290,8 +1269,7 @@ mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
struct mt76_wcid *wcid,
- struct ieee80211_key_conf *key,
- enum set_key_cmd cmd)
+ struct ieee80211_key_conf *key)
{
enum mt76_cipher_type cipher;
u16 cipher_mask = wcid->cipher;
@@ -1301,19 +1279,14 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
if (cipher == MT_CIPHER_NONE)
return -EOPNOTSUPP;
- if (cmd == SET_KEY)
- cipher_mask |= BIT(cipher);
- else
- cipher_mask &= ~BIT(cipher);
-
- mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask, cmd);
- err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask,
- cmd);
+ cipher_mask |= BIT(cipher);
+ mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask);
+ err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask);
if (err < 0)
return err;
err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, cipher_mask,
- key->keyidx, cmd);
+ key->keyidx);
if (err < 0)
return err;
@@ -1324,13 +1297,12 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
struct mt76_wcid *wcid,
- struct ieee80211_key_conf *key,
- enum set_key_cmd cmd)
+ struct ieee80211_key_conf *key)
{
int err;
spin_lock_bh(&dev->mt76.lock);
- err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+ err = __mt7615_mac_wtbl_set_key(dev, wcid, key);
spin_unlock_bh(&dev->mt76.lock);
return err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index ab4c1b4478aa..dadb13f2ca09 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -391,18 +391,17 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
if (cmd == SET_KEY)
*wcid_keyidx = idx;
- else if (idx == *wcid_keyidx)
- *wcid_keyidx = -1;
- else
+ else {
+ if (idx == *wcid_keyidx)
+ *wcid_keyidx = -1;
goto out;
+ }
- mt76_wcid_key_setup(&dev->mt76, wcid,
- cmd == SET_KEY ? key : NULL);
-
+ mt76_wcid_key_setup(&dev->mt76, wcid, key);
if (mt76_is_mmio(&dev->mt76))
- err = mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+ err = mt7615_mac_wtbl_set_key(dev, wcid, key);
else
- err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+ err = __mt7615_mac_wtbl_set_key(dev, wcid, key);
out:
mt7615_mutex_release(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 0381c53bc96a..582d1b5b7cb3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -482,11 +482,9 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
void mt7615_mac_set_timing(struct mt7615_phy *phy);
int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
struct mt76_wcid *wcid,
- struct ieee80211_key_conf *key,
- enum set_key_cmd cmd);
+ struct ieee80211_key_conf *key);
int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
- struct ieee80211_key_conf *key,
- enum set_key_cmd cmd);
+ struct ieee80211_key_conf *key);
void mt7615_mac_reset_work(struct work_struct *work);
u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 7451a63206a5..dcbb5c605dfe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -454,20 +454,20 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
msta = sta ? (struct mt76x02_sta *)sta->drv_priv : NULL;
wcid = msta ? &msta->wcid : &mvif->group_wcid;
- if (cmd == SET_KEY) {
- key->hw_key_idx = wcid->idx;
- wcid->hw_key_idx = idx;
- if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) {
- key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
- wcid->sw_iv = true;
- }
- } else {
+ if (cmd != SET_KEY) {
if (idx == wcid->hw_key_idx) {
wcid->hw_key_idx = -1;
wcid->sw_iv = false;
}
- key = NULL;
+ return 0;
+ }
+
+ key->hw_key_idx = wcid->idx;
+ wcid->hw_key_idx = idx;
+ if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) {
+ key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
+ wcid->sw_iv = true;
}
mt76_wcid_key_setup(&dev->mt76, wcid, key);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 2ada2806de66..1b361199c061 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -409,16 +409,15 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
mt7915_mcu_add_bss_info(phy, vif, true);
}
- if (cmd == SET_KEY)
+ if (cmd == SET_KEY) {
*wcid_keyidx = idx;
- else if (idx == *wcid_keyidx)
- *wcid_keyidx = -1;
- else
+ } else {
+ if (idx == *wcid_keyidx)
+ *wcid_keyidx = -1;
goto out;
+ }
- mt76_wcid_key_setup(&dev->mt76, wcid,
- cmd == SET_KEY ? key : NULL);
-
+ mt76_wcid_key_setup(&dev->mt76, wcid, key);
err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip,
key, MCU_EXT_CMD(STA_REC_UPDATE),
&msta->wcid, cmd);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index e929f6eb65ce..bf1da9fddfab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -176,12 +176,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
static u8
mt7921_get_offload_capability(struct device *dev, const char *fw_wm)
{
- struct mt7921_fw_features *features = NULL;
const struct mt76_connac2_fw_trailer *hdr;
struct mt7921_realease_info *rel_info;
const struct firmware *fw;
int ret, i, offset = 0;
const u8 *data, *end;
+ u8 offload_caps = 0;
ret = request_firmware(&fw, fw_wm, dev);
if (ret)
@@ -213,7 +213,10 @@ mt7921_get_offload_capability(struct device *dev, const char *fw_wm)
data += sizeof(*rel_info);
if (rel_info->tag == MT7921_FW_TAG_FEATURE) {
+ struct mt7921_fw_features *features;
+
features = (struct mt7921_fw_features *)data;
+ offload_caps = features->data;
break;
}
@@ -223,7 +226,7 @@ mt7921_get_offload_capability(struct device *dev, const char *fw_wm)
out:
release_firmware(fw);
- return features ? features->data : 0;
+ return offload_caps;
}
struct ieee80211_ops *
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0c9a472bc81a..3b6adb29cbef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -548,16 +548,15 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
mt7921_mutex_acquire(dev);
- if (cmd == SET_KEY)
+ if (cmd == SET_KEY) {
*wcid_keyidx = idx;
- else if (idx == *wcid_keyidx)
- *wcid_keyidx = -1;
- else
+ } else {
+ if (idx == *wcid_keyidx)
+ *wcid_keyidx = -1;
goto out;
+ }
- mt76_wcid_key_setup(&dev->mt76, wcid,
- cmd == SET_KEY ? key : NULL);
-
+ mt76_wcid_key_setup(&dev->mt76, wcid, key);
err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip,
key, MCU_UNI_CMD(STA_REC_UPDATE),
&msta->wcid, cmd);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 1c727870bbdb..ddb1fa4ee01d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -20,7 +20,7 @@ static const struct pci_device_id mt7921_pci_device_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608),
.driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM },
{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616),
- .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM },
+ .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM },
{ },
};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
index 0975774fe244..f306e9c50ea3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
@@ -356,16 +356,15 @@ static int mt7996_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
mt7996_mcu_add_bss_info(phy, vif, true);
}
- if (cmd == SET_KEY)
+ if (cmd == SET_KEY) {
*wcid_keyidx = idx;
- else if (idx == *wcid_keyidx)
- *wcid_keyidx = -1;
- else
+ } else {
+ if (idx == *wcid_keyidx)
+ *wcid_keyidx = -1;
goto out;
+ }
- mt76_wcid_key_setup(&dev->mt76, wcid,
- cmd == SET_KEY ? key : NULL);
-
+ mt76_wcid_key_setup(&dev->mt76, wcid, key);
err = mt7996_mcu_add_key(&dev->mt76, vif, &msta->bip,
key, MCU_WMWA_UNI_CMD(STA_REC_UPDATE),
&msta->wcid, cmd);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
index 5bf5a93937c9..04517bd3325a 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
@@ -295,7 +295,7 @@ static int ipc_pcie_probe(struct pci_dev *pci,
ret = dma_set_mask(ipc_pcie->dev, DMA_BIT_MASK(64));
if (ret) {
dev_err(ipc_pcie->dev, "Could not set PCI DMA mask: %d", ret);
- return ret;
+ goto set_mask_fail;
}
ipc_pcie_config_aspm(ipc_pcie);
@@ -323,6 +323,7 @@ static int ipc_pcie_probe(struct pci_dev *pci,
imem_init_fail:
ipc_pcie_resources_release(ipc_pcie);
resources_req_fail:
+set_mask_fail:
pci_disable_device(pci);
pci_enable_fail:
kfree(ipc_pcie);
diff --git a/drivers/net/wwan/rpmsg_wwan_ctrl.c b/drivers/net/wwan/rpmsg_wwan_ctrl.c
index 06f4b02f1552..86b60aadfa11 100644
--- a/drivers/net/wwan/rpmsg_wwan_ctrl.c
+++ b/drivers/net/wwan/rpmsg_wwan_ctrl.c
@@ -149,6 +149,7 @@ static const struct rpmsg_device_id rpmsg_wwan_ctrl_id_table[] = {
/* RPMSG channels for Qualcomm SoCs with integrated modem */
{ .name = "DATA5_CNTL", .driver_data = WWAN_PORT_QMI },
{ .name = "DATA4", .driver_data = WWAN_PORT_AT },
+ { .name = "DATA1", .driver_data = WWAN_PORT_AT },
{},
};
MODULE_DEVICE_TABLE(rpmsg, rpmsg_wwan_ctrl_id_table);
diff --git a/drivers/net/wwan/t7xx/Makefile b/drivers/net/wwan/t7xx/Makefile
index 268ff9e87e5b..2652cd00504e 100644
--- a/drivers/net/wwan/t7xx/Makefile
+++ b/drivers/net/wwan/t7xx/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y += -Werror
-
obj-${CONFIG_MTK_T7XX} := mtk_t7xx.o
mtk_t7xx-y:= t7xx_pci.o \
t7xx_pcie_mac.o \
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 2e1c01cf00a9..aa54fa6d5f90 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -492,6 +492,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
if (err)
goto error_put_device;
+ dev_info(&wwandev->dev, "port %s attached\n", dev_name(&port->dev));
return port;
error_put_device:
@@ -517,6 +518,8 @@ void wwan_remove_port(struct wwan_port *port)
skb_queue_purge(&port->rxq);
dev_set_drvdata(&port->dev, NULL);
+
+ dev_info(&wwandev->dev, "port %s disconnected\n", dev_name(&port->dev));
device_unregister(&port->dev);
/* Release related wwan device */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 53ef028596c6..d6a9bac91a4c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1674,6 +1674,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
struct request_queue *queue = disk->queue;
u32 size = queue_logical_block_size(queue);
+ if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
+ ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+
if (ctrl->max_discard_sectors == 0) {
blk_queue_max_discard_sectors(queue, 0);
return;
@@ -1688,9 +1691,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
if (queue->limits.max_discard_sectors)
return;
- if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
- ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
-
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b615906263f3..cd7873de3121 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3441,6 +3441,9 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
+ .driver_data = NVME_QUIRK_BOGUS_NID |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 42c0598c31f2..49c9e7bc9116 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1620,22 +1620,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
if (ret)
goto err_init_connect;
- queue->rd_enabled = true;
set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
- nvme_tcp_init_recv_ctx(queue);
-
- write_lock_bh(&queue->sock->sk->sk_callback_lock);
- queue->sock->sk->sk_user_data = queue;
- queue->state_change = queue->sock->sk->sk_state_change;
- queue->data_ready = queue->sock->sk->sk_data_ready;
- queue->write_space = queue->sock->sk->sk_write_space;
- queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
- queue->sock->sk->sk_state_change = nvme_tcp_state_change;
- queue->sock->sk->sk_write_space = nvme_tcp_write_space;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- queue->sock->sk->sk_ll_usec = 1;
-#endif
- write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
@@ -1655,7 +1640,7 @@ err_destroy_mutex:
return ret;
}
-static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
{
struct socket *sock = queue->sock;
@@ -1670,7 +1655,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
{
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- nvme_tcp_restore_sock_calls(queue);
+ nvme_tcp_restore_sock_ops(queue);
cancel_work_sync(&queue->io_work);
}
@@ -1688,21 +1673,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
mutex_unlock(&queue->queue_lock);
}
+static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
+{
+ write_lock_bh(&queue->sock->sk->sk_callback_lock);
+ queue->sock->sk->sk_user_data = queue;
+ queue->state_change = queue->sock->sk->sk_state_change;
+ queue->data_ready = queue->sock->sk->sk_data_ready;
+ queue->write_space = queue->sock->sk->sk_write_space;
+ queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+ queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+ queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ queue->sock->sk->sk_ll_usec = 1;
+#endif
+ write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+}
+
static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[idx];
int ret;
+ queue->rd_enabled = true;
+ nvme_tcp_init_recv_ctx(queue);
+ nvme_tcp_setup_sock_ops(queue);
+
if (idx)
ret = nvmf_connect_io_queue(nctrl, idx);
else
ret = nvmf_connect_admin_queue(nctrl);
if (!ret) {
- set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
+ set_bit(NVME_TCP_Q_LIVE, &queue->flags);
} else {
- if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
- __nvme_tcp_stop_queue(&ctrl->queues[idx]);
+ if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
dev_err(nctrl->device,
"failed to connect queue: %d ret=%d\n", idx, ret);
}
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 07d93753b12f..e311d406b170 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -226,6 +226,7 @@ static void __of_attach_node(struct device_node *np)
np->sibling = np->parent->child;
np->parent->child = np;
of_node_clear_flag(np, OF_DETACHED);
+ np->fwnode.flags |= FWNODE_FLAG_NOT_DEVICE;
}
/**
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index b2bd2e783445..78ae84187449 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -737,6 +737,11 @@ static int of_platform_notify(struct notifier_block *nb,
if (of_node_check_flag(rd->dn, OF_POPULATED))
return NOTIFY_OK;
+ /*
+ * Clear the flag before adding the device so that fw_devlink
+ * doesn't skip adding consumers to this device.
+ */
+ rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
/* pdev_parent may be NULL when no bus platform device */
pdev_parent = of_find_device_by_node(rd->dn->parent);
pdev = of_platform_device_create(rd->dn, NULL,
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 53a16b8b6ac2..8e33e6e59e68 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -1001,11 +1001,6 @@ void dw_pcie_setup(struct dw_pcie *pci)
dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
}
- val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
- val &= ~PORT_LINK_FAST_LINK_MODE;
- val |= PORT_LINK_DLL_LINK_EN;
- dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
-
if (dw_pcie_cap_is(pci, CDM_CHECK)) {
val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
@@ -1013,6 +1008,11 @@ void dw_pcie_setup(struct dw_pcie *pci)
dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
}
+ val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+ val &= ~PORT_LINK_FAST_LINK_MODE;
+ val |= PORT_LINK_DLL_LINK_EN;
+ dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
if (!pci->num_lanes) {
dev_dbg(pci->dev, "Using h/w default number of lanes\n");
return;
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index 66d9ab288646..e5e9b287b976 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
return -EIO;
/* Length is 2 DW of header + length of payload in DW */
- length = 2 + task->request_pl_sz / sizeof(u32);
+ length = 2 + task->request_pl_sz / sizeof(__le32);
if (length > PCI_DOE_MAX_LENGTH)
return -EIO;
if (length == PCI_DOE_MAX_LENGTH)
@@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
length));
- for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
+ for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
- task->request_pl[i]);
+ le32_to_cpu(task->request_pl[i]));
pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
@@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
/* First 2 dwords have already been read */
length -= 2;
- payload_length = min(length, task->response_pl_sz / sizeof(u32));
+ payload_length = min(length, task->response_pl_sz / sizeof(__le32));
/* Read the rest of the response payload */
for (i = 0; i < payload_length; i++) {
- pci_read_config_dword(pdev, offset + PCI_DOE_READ,
- &task->response_pl[i]);
+ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+ task->response_pl[i] = cpu_to_le32(val);
/* Prior to the last ack, ensure Data Object Ready */
if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
return -EIO;
@@ -217,13 +217,14 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
return -EIO;
- return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32);
+ return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32);
}
static void signal_task_complete(struct pci_doe_task *task, int rv)
{
task->rv = rv;
task->complete(task);
+ destroy_work_on_stack(&task->work);
}
static void signal_task_abort(struct pci_doe_task *task, int rv)
@@ -317,14 +318,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
{
u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
*index);
+ __le32 request_pl_le = cpu_to_le32(request_pl);
+ __le32 response_pl_le;
u32 response_pl;
DECLARE_COMPLETION_ONSTACK(c);
struct pci_doe_task task = {
.prot.vid = PCI_VENDOR_ID_PCI_SIG,
.prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
- .request_pl = &request_pl,
+ .request_pl = &request_pl_le,
.request_pl_sz = sizeof(request_pl),
- .response_pl = &response_pl,
+ .response_pl = &response_pl_le,
.response_pl_sz = sizeof(response_pl),
.complete = pci_doe_task_complete,
.private = &c,
@@ -340,6 +343,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
if (task.rv != sizeof(response_pl))
return -EIO;
+ response_pl = le32_to_cpu(response_pl_le);
*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
response_pl);
@@ -520,6 +524,8 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
* task->complete will be called when the state machine is done processing this
* task.
*
+ * @task must be allocated on the stack.
+ *
* Excess data will be discarded.
*
* RETURNS: 0 when task has been successfully queued, -ERRNO on error
@@ -533,15 +539,15 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
* DOE requests must be a whole number of DW and the response needs to
* be big enough for at least 1 DW
*/
- if (task->request_pl_sz % sizeof(u32) ||
- task->response_pl_sz < sizeof(u32))
+ if (task->request_pl_sz % sizeof(__le32) ||
+ task->response_pl_sz < sizeof(__le32))
return -EINVAL;
if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
return -EIO;
task->doe_mb = doe_mb;
- INIT_WORK(&task->work, doe_statemachine_work);
+ INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
queue_work(doe_mb->work_queue, &task->work);
return 0;
}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 0145aef1b930..22d39e12b236 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus *bus)
list_for_each_entry_safe(child, tmp,
&bus->devices, bus_list)
pci_remove_bus_device(child);
- pci_remove_bus(bus);
- host_bridge->bus = NULL;
#ifdef CONFIG_PCI_DOMAINS_GENERIC
/* Release domain_nr if it was dynamically allocated */
@@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus *bus)
pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
#endif
+ pci_remove_bus(bus);
+ host_bridge->bus = NULL;
+
/* remove the host bridge */
device_del(&host_bridge->dev);
}
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index a78fdb15e26c..8b643888d503 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -21,23 +21,23 @@
#define DMC_QOS_IRQ BIT(30)
/* DMC bandwidth monitor register address offset */
-#define DMC_MON_G12_CTRL0 (0x20 << 2)
-#define DMC_MON_G12_CTRL1 (0x21 << 2)
-#define DMC_MON_G12_CTRL2 (0x22 << 2)
-#define DMC_MON_G12_CTRL3 (0x23 << 2)
-#define DMC_MON_G12_CTRL4 (0x24 << 2)
-#define DMC_MON_G12_CTRL5 (0x25 << 2)
-#define DMC_MON_G12_CTRL6 (0x26 << 2)
-#define DMC_MON_G12_CTRL7 (0x27 << 2)
-#define DMC_MON_G12_CTRL8 (0x28 << 2)
-
-#define DMC_MON_G12_ALL_REQ_CNT (0x29 << 2)
-#define DMC_MON_G12_ALL_GRANT_CNT (0x2a << 2)
-#define DMC_MON_G12_ONE_GRANT_CNT (0x2b << 2)
-#define DMC_MON_G12_SEC_GRANT_CNT (0x2c << 2)
-#define DMC_MON_G12_THD_GRANT_CNT (0x2d << 2)
-#define DMC_MON_G12_FOR_GRANT_CNT (0x2e << 2)
-#define DMC_MON_G12_TIMER (0x2f << 2)
+#define DMC_MON_G12_CTRL0 (0x0 << 2)
+#define DMC_MON_G12_CTRL1 (0x1 << 2)
+#define DMC_MON_G12_CTRL2 (0x2 << 2)
+#define DMC_MON_G12_CTRL3 (0x3 << 2)
+#define DMC_MON_G12_CTRL4 (0x4 << 2)
+#define DMC_MON_G12_CTRL5 (0x5 << 2)
+#define DMC_MON_G12_CTRL6 (0x6 << 2)
+#define DMC_MON_G12_CTRL7 (0x7 << 2)
+#define DMC_MON_G12_CTRL8 (0x8 << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT (0x9 << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT (0xa << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT (0xb << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT (0xc << 2)
+#define DMC_MON_G12_THD_GRANT_CNT (0xd << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT (0xe << 2)
+#define DMC_MON_G12_TIMER (0xf << 2)
/* Each bit represent a axi line */
PMU_FORMAT_ATTR(event, "config:0-7");
diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig
index f20c28334bcb..a71874fed3d6 100644
--- a/drivers/pinctrl/mediatek/Kconfig
+++ b/drivers/pinctrl/mediatek/Kconfig
@@ -45,35 +45,35 @@ config PINCTRL_MTK_PARIS
# For ARMv7 SoCs
config PINCTRL_MT2701
- bool "Mediatek MT2701 pin control"
+ bool "MediaTek MT2701 pin control"
depends on MACH_MT7623 || MACH_MT2701 || COMPILE_TEST
depends on OF
default MACH_MT2701
select PINCTRL_MTK
config PINCTRL_MT7623
- bool "Mediatek MT7623 pin control with generic binding"
+ bool "MediaTek MT7623 pin control with generic binding"
depends on MACH_MT7623 || COMPILE_TEST
depends on OF
default MACH_MT7623
select PINCTRL_MTK_MOORE
config PINCTRL_MT7629
- bool "Mediatek MT7629 pin control"
+ bool "MediaTek MT7629 pin control"
depends on MACH_MT7629 || COMPILE_TEST
depends on OF
default MACH_MT7629
select PINCTRL_MTK_MOORE
config PINCTRL_MT8135
- bool "Mediatek MT8135 pin control"
+ bool "MediaTek MT8135 pin control"
depends on MACH_MT8135 || COMPILE_TEST
depends on OF
default MACH_MT8135
select PINCTRL_MTK
config PINCTRL_MT8127
- bool "Mediatek MT8127 pin control"
+ bool "MediaTek MT8127 pin control"
depends on MACH_MT8127 || COMPILE_TEST
depends on OF
default MACH_MT8127
@@ -88,33 +88,33 @@ config PINCTRL_MT2712
select PINCTRL_MTK
config PINCTRL_MT6765
- tristate "Mediatek MT6765 pin control"
+ tristate "MediaTek MT6765 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
config PINCTRL_MT6779
- tristate "Mediatek MT6779 pin control"
+ tristate "MediaTek MT6779 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
help
Say yes here to support pin controller and gpio driver
- on Mediatek MT6779 SoC.
+ on MediaTek MT6779 SoC.
In MTK platform, we support virtual gpio and use it to
map specific eint which doesn't have real gpio pin.
config PINCTRL_MT6795
- bool "Mediatek MT6795 pin control"
+ bool "MediaTek MT6795 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
config PINCTRL_MT6797
- bool "Mediatek MT6797 pin control"
+ bool "MediaTek MT6797 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
@@ -128,40 +128,42 @@ config PINCTRL_MT7622
select PINCTRL_MTK_MOORE
config PINCTRL_MT7981
- bool "Mediatek MT7981 pin control"
+ bool "MediaTek MT7981 pin control"
depends on OF
+ depends on ARM64 || COMPILE_TEST
+ default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_MOORE
config PINCTRL_MT7986
- bool "Mediatek MT7986 pin control"
+ bool "MediaTek MT7986 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_MOORE
config PINCTRL_MT8167
- bool "Mediatek MT8167 pin control"
+ bool "MediaTek MT8167 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK
config PINCTRL_MT8173
- bool "Mediatek MT8173 pin control"
+ bool "MediaTek MT8173 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK
config PINCTRL_MT8183
- bool "Mediatek MT8183 pin control"
+ bool "MediaTek MT8183 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
config PINCTRL_MT8186
- bool "Mediatek MT8186 pin control"
+ bool "MediaTek MT8186 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
@@ -180,28 +182,28 @@ config PINCTRL_MT8188
map specific eint which doesn't have real gpio pin.
config PINCTRL_MT8192
- bool "Mediatek MT8192 pin control"
+ bool "MediaTek MT8192 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
config PINCTRL_MT8195
- bool "Mediatek MT8195 pin control"
+ bool "MediaTek MT8195 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK_PARIS
config PINCTRL_MT8365
- bool "Mediatek MT8365 pin control"
+ bool "MediaTek MT8365 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
select PINCTRL_MTK
config PINCTRL_MT8516
- bool "Mediatek MT8516 pin control"
+ bool "MediaTek MT8516 pin control"
depends on OF
depends on ARM64 || COMPILE_TEST
default ARM64 && ARCH_MEDIATEK
@@ -209,7 +211,7 @@ config PINCTRL_MT8516
# For PMIC
config PINCTRL_MT6397
- bool "Mediatek MT6397 pin control"
+ bool "MediaTek MT6397 pin control"
depends on MFD_MT6397 || COMPILE_TEST
depends on OF
default MFD_MT6397
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 373eed8bc4be..c775d239444a 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
dev_err(dev, "can't add the irq domain\n");
return -ENODEV;
}
- atmel_pioctrl->irq_domain->name = "atmel gpio";
for (i = 0; i < atmel_pioctrl->npins; i++) {
int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i);
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 29e4a6282a64..1dcbd0937ef5 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
BIT(p), f << p);
regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
- BIT(p), f << (p - 1));
+ BIT(p), (f >> 1) << p);
return 0;
}
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index cb33a23ab0c1..04ace4c7bd58 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -1330,7 +1330,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode
if (fwnode_property_read_u32(fwnode, "st,bank-ioport", &bank_ioport_nr))
bank_ioport_nr = bank_nr;
- bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
+ bank->gpio_chip.base = -1;
bank->gpio_chip.ngpio = npins;
bank->gpio_chip.fwnode = fwnode;
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index cb15acdf14a3..e2c9a68d12df 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -464,7 +464,8 @@ static const struct dmi_system_id asus_quirks[] = {
.ident = "ASUS ROG FLOW X13",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "GV301Q"),
+ /* Match GV301** */
+ DMI_MATCH(DMI_PRODUCT_NAME, "GV301"),
},
.driver_data = &quirk_asus_tablet_mode,
},
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 4dd39ab6ecfa..2a426040f749 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -151,6 +151,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B650 AORUS ELITE AX"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
@@ -160,6 +161,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570S AORUS ELITE"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"),
{ }
};
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 0eb5bfdd823a..959ec3c5f376 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = {
{ KE_KEY, 65, { KEY_PROG4 } },
{ KE_KEY, 66, { KEY_TOUCHPAD_OFF } },
{ KE_KEY, 67, { KEY_TOUCHPAD_ON } },
- { KE_KEY, 68, { KEY_TOUCHPAD_TOGGLE } },
{ KE_KEY, 128, { KEY_ESC } },
/*
@@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_
if (priv->features.ctrl_ps2_aux_port)
i8042_command(&param, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE);
- if (send_events) {
- /*
- * On older models the EC controls the touchpad and toggles it
- * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF.
- * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE.
- */
- if (value != priv->r_touchpad_val) {
- ideapad_input_report(priv, value ? 67 : 66);
- sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
- } else {
- ideapad_input_report(priv, 68);
- }
+ /*
+ * On older models the EC controls the touchpad and toggles it on/off
+ * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do
+ * an acpi-notify with VPC bit 5 set on resume, so this function get
+ * called with send_events=true on every resume. Therefor if the EC did
+ * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE.
+ */
+ if (send_events && value != priv->r_touchpad_val) {
+ ideapad_input_report(priv, value ? 67 : 66);
+ sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
}
priv->r_touchpad_val = value;
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 3a15d32d7644..b9591969e0fa 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset,
static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value)
{
- return (u64)value * pmcdev->map->slp_s0_res_counter_step;
+ /*
+ * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are
+ * used as a workaround which uses 30.5 usec tick. All other client
+ * programs have the legacy SLP_S0 residency counter that is using the 122
+ * usec tick.
+ */
+ const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2;
+
+ if (pmcdev->map == &adl_reg_map)
+ return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2);
+ else
+ return (u64)value * pmcdev->map->slp_s0_res_counter_step;
}
static int set_etr3(struct pmc_dev *pmcdev)
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index c816646eb661..78dc82bda4dd 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -920,7 +920,7 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at
static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
- char *item, *value;
+ char *item, *value, *p;
int ret;
ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID);
@@ -930,10 +930,15 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
/* validate and split from `item,value` -> `value` */
value = strpbrk(item, ",");
if (!value || value == item || !strlen(value + 1))
- return -EINVAL;
-
- ret = sysfs_emit(buf, "%s\n", value + 1);
+ ret = -EINVAL;
+ else {
+ /* On Workstations remove the Options part after the value */
+ p = strchrnul(value, ';');
+ *p = '\0';
+ ret = sysfs_emit(buf, "%s\n", value + 1);
+ }
kfree(item);
+
return ret;
}
@@ -1457,10 +1462,10 @@ static int tlmi_analyze(void)
* name string.
* Try and pull that out if it's available.
*/
- char *item, *optstart, *optend;
+ char *optitem, *optstart, *optend;
- if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) {
- optstart = strstr(item, "[Optional:");
+ if (!tlmi_setting(setting->index, &optitem, LENOVO_BIOS_SETTING_GUID)) {
+ optstart = strstr(optitem, "[Optional:");
if (optstart) {
optstart += strlen("[Optional:");
optend = strstr(optstart, "]");
@@ -1469,6 +1474,7 @@ static int tlmi_analyze(void)
kstrndup(optstart, optend - optstart,
GFP_KERNEL);
}
+ kfree(optitem);
}
}
/*
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 32c10457399e..7191ff2625b1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -4479,6 +4479,14 @@ static const struct dmi_system_id fwbug_list[] __initconst = {
}
},
{
+ .ident = "T14s Gen1 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"),
+ }
+ },
+ {
.ident = "P14s Gen1 AMD",
.driver_data = &quirk_s2idle_bug,
.matches = {
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index e01147f66e15..474725714a05 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -115,7 +115,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
}
if (pwm->chip->ops->get_state) {
- struct pwm_state state;
+ /*
+ * Zero-initialize state because most drivers are unaware of
+ * .usage_power. The other members of state are supposed to be
+ * set by lowlevel drivers. We still initialize the whole
+ * structure for simplicity even though this might paper over
+ * faulty implementations of .get_state().
+ */
+ struct pwm_state state = { 0, };
err = pwm->chip->ops->get_state(pwm->chip, pwm, &state);
trace_pwm_get(pwm, &state, err);
@@ -448,7 +455,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
{
struct pwm_state *last = &pwm->last;
struct pwm_chip *chip = pwm->chip;
- struct pwm_state s1, s2;
+ struct pwm_state s1 = { 0 }, s2 = { 0 };
int err;
if (!IS_ENABLED(CONFIG_PWM_DEBUG))
@@ -530,6 +537,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
return;
}
+ *last = (struct pwm_state){ 0 };
err = chip->ops->get_state(chip, pwm, last);
trace_pwm_get(pwm, last, err);
if (err)
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index 86df6702cb83..ad18b0ebe3f1 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -198,6 +198,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
state->enabled = (ret > 0);
state->period = EC_PWM_MAX_DUTY;
+ state->polarity = PWM_POLARITY_NORMAL;
/*
* Note that "disabled" and "duty cycle == 0" are treated the same. If
diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index 12c05c155cab..1b9274c5ad87 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -146,6 +146,7 @@ static int hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm));
state->enabled = (PWM_ENABLE_MASK & value);
+ state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
return 0;
}
diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
index 8362b4870c66..47b3141135f3 100644
--- a/drivers/pwm/pwm-iqs620a.c
+++ b/drivers/pwm/pwm-iqs620a.c
@@ -126,6 +126,7 @@ static int iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
mutex_unlock(&iqs620_pwm->lock);
state->period = IQS620_PWM_PERIOD_NS;
+ state->polarity = PWM_POLARITY_NORMAL;
return 0;
}
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 16d79ca5d8f5..5cd7b90872c6 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -162,6 +162,12 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
duty = state->duty_cycle;
period = state->period;
+ /*
+ * Note this is wrong. The result is an output wave that isn't really
+ * inverted and so is wrongly identified by .get_state as normal.
+ * Fixing this needs some care however as some machines might rely on
+ * this.
+ */
if (state->polarity == PWM_POLARITY_INVERSED)
duty = period - duty;
@@ -358,6 +364,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
state->duty_cycle = 0;
}
+ state->polarity = PWM_POLARITY_NORMAL;
+
return 0;
}
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
index d866ce345f97..bde579a338c2 100644
--- a/drivers/pwm/pwm-sprd.c
+++ b/drivers/pwm/pwm-sprd.c
@@ -109,6 +109,7 @@ static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
duty = val & SPRD_PWM_DUTY_MSK;
tmp = (prescale + 1) * NSEC_PER_SEC * duty;
state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+ state->polarity = PWM_POLARITY_NORMAL;
/* Disable PWM clocks if the PWM channel is not in enable state. */
if (!state->enabled)
diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 529963a7e4f5..41537c45f036 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -8,18 +8,19 @@
// Copyright (c) 2012 Marvell Technology Ltd.
// Yunfan Zhang <yfzhang@marvell.com>
+#include <linux/bits.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/param.h>
-#include <linux/err.h>
#include <linux/platform_device.h>
+#include <linux/regmap.h>
#include <linux/regulator/driver.h>
+#include <linux/regulator/fan53555.h>
#include <linux/regulator/machine.h>
#include <linux/regulator/of_regulator.h>
-#include <linux/of_device.h>
-#include <linux/i2c.h>
#include <linux/slab.h>
-#include <linux/regmap.h>
-#include <linux/regulator/fan53555.h>
/* Voltage setting */
#define FAN53555_VSEL0 0x00
@@ -60,7 +61,7 @@
#define TCS_VSEL1_MODE (1 << 6)
#define TCS_SLEW_SHIFT 3
-#define TCS_SLEW_MASK (0x3 < 3)
+#define TCS_SLEW_MASK GENMASK(4, 3)
enum fan53555_vendor {
FAN53526_VENDOR_FAIRCHILD = 0,
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 2a9867abba20..e6724a229d23 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
drvdata->enable_clock = devm_clk_get(dev, NULL);
if (IS_ERR(drvdata->enable_clock)) {
dev_err(dev, "Can't get enable-clock from devicetree\n");
- return -ENOENT;
+ return PTR_ERR(drvdata->enable_clock);
}
} else if (drvtype && drvtype->has_performance_state) {
drvdata->desc.ops = &fixed_voltage_domain_ops;
diff --git a/drivers/regulator/sm5703-regulator.c b/drivers/regulator/sm5703-regulator.c
index 05ad28fc4da8..229df7170792 100644
--- a/drivers/regulator/sm5703-regulator.c
+++ b/drivers/regulator/sm5703-regulator.c
@@ -42,6 +42,7 @@ static const int sm5703_buck_voltagemap[] = {
.type = REGULATOR_VOLTAGE, \
.id = SM5703_USBLDO ## _id, \
.ops = &sm5703_regulator_ops_fixed, \
+ .n_voltages = 1, \
.fixed_uV = SM5703_USBLDO_MICROVOLT, \
.enable_reg = SM5703_REG_USBLDO12, \
.enable_mask = SM5703_REG_EN_USBLDO ##_id, \
@@ -56,6 +57,7 @@ static const int sm5703_buck_voltagemap[] = {
.type = REGULATOR_VOLTAGE, \
.id = SM5703_VBUS, \
.ops = &sm5703_regulator_ops_fixed, \
+ .n_voltages = 1, \
.fixed_uV = SM5703_VBUS_MICROVOLT, \
.enable_reg = SM5703_REG_CNTL, \
.enable_mask = SM5703_OPERATION_MODE_MASK, \
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index c76f82fb8b63..15f452908926 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -771,13 +771,12 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
iscsi_set_param(cls_conn, param, buf, buflen);
break;
case ISCSI_PARAM_DATADGST_EN:
- iscsi_set_param(cls_conn, param, buf, buflen);
-
mutex_lock(&tcp_sw_conn->sock_lock);
if (!tcp_sw_conn->sock) {
mutex_unlock(&tcp_sw_conn->sock_lock);
return -ENOTCONN;
}
+ iscsi_set_param(cls_conn, param, buf, buflen);
tcp_sw_conn->sendpage = conn->datadgst_en ?
sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
mutex_unlock(&tcp_sw_conn->sock_lock);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 3ceece988338..c895189375e2 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -3298,7 +3298,7 @@ fw_crash_buffer_show(struct device *cdev,
spin_lock_irqsave(&instance->crashdump_lock, flags);
buff_offset = instance->fw_crash_buffer_offset;
- if (!instance->crash_dump_buf &&
+ if (!instance->crash_dump_buf ||
!((instance->fw_crash_state == AVAILABLE) ||
(instance->fw_crash_state == COPYING))) {
dev_err(&instance->pdev->dev,
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 84c9a55a5794..8a83f3fc2b86 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -4771,7 +4771,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd)
devhandle = megasas_get_tm_devhandle(scmd->device);
if (devhandle == (u16)ULONG_MAX) {
- ret = SUCCESS;
+ ret = FAILED;
sdev_printk(KERN_INFO, scmd->device,
"task abort issued for invalid devhandle\n");
mutex_unlock(&instance->reset_mutex);
@@ -4841,7 +4841,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd)
devhandle = megasas_get_tm_devhandle(scmd->device);
if (devhandle == (u16)ULONG_MAX) {
- ret = SUCCESS;
+ ret = FAILED;
sdev_printk(KERN_INFO, scmd->device,
"target reset issued for invalid devhandle\n");
mutex_unlock(&instance->reset_mutex);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index a565817aa56d..d109a4ceb72b 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -2526,7 +2526,7 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
mrioc->unrecoverable = 1;
goto schedule_work;
case MPI3_SYSIF_FAULT_CODE_SOFT_RESET_IN_PROGRESS:
- return;
+ goto schedule_work;
case MPI3_SYSIF_FAULT_CODE_CI_ACTIVATION_RESET:
reset_reason = MPI3MR_RESET_FROM_CIACTIV_FAULT;
break;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 2ee9ea57554d..14ae0a9c5d3d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
else if (rc == -EAGAIN)
goto try_32bit_dma;
total_sz += sense_sz;
- ioc_info(ioc,
- "sense pool(0x%p)- dma(0x%llx): depth(%d),"
- "element_size(%d), pool_size(%d kB)\n",
- ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth,
- SCSI_SENSE_BUFFERSIZE, sz / 1024);
/* reply pool, 4 byte align */
sz = ioc->reply_free_queue_depth * ioc->reply_sz;
rc = _base_allocate_reply_pool(ioc, sz);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index bee1b8a82020..d0cdbfe771a9 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -3617,6 +3617,7 @@ skip_dpc:
probe_failed:
qla_enode_stop(base_vha);
qla_edb_stop(base_vha);
+ vfree(base_vha->scan.l);
if (base_vha->gnl.l) {
dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
base_vha->gnl.l, base_vha->gnl.ldma);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 5cce1ba70fc6..09ef0b31dfc0 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -314,11 +314,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
if (result)
return -EIO;
- /* Sanity check that we got the page back that we asked for */
+ /*
+ * Sanity check that we got the page back that we asked for and that
+ * the page size is not 0.
+ */
if (buffer[1] != page)
return -EIO;
- return get_unaligned_be16(&buffer[2]) + 4;
+ result = get_unaligned_be16(&buffer[2]);
+ if (!result)
+ return -EIO;
+
+ return result + 4;
}
static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index b11a9162e73a..b54f2c6c08c3 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -509,9 +509,6 @@ static int ses_enclosure_find_by_addr(struct enclosure_device *edev,
int i;
struct ses_component *scomp;
- if (!edev->component[0].scratch)
- return 0;
-
for (i = 0; i < edev->components; i++) {
scomp = edev->component[i].scratch;
if (scomp->addr != efd->addr)
@@ -602,8 +599,10 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
components++,
type_ptr[0],
name);
- else
+ else if (components < edev->components)
ecomp = &edev->component[components++];
+ else
+ ecomp = ERR_PTR(-EINVAL);
if (!IS_ERR(ecomp)) {
if (addl_desc_ptr) {
@@ -734,11 +733,6 @@ static int ses_intf_add(struct device *cdev,
components += type_ptr[1];
}
- if (components == 0) {
- sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
- goto err_free;
- }
-
ses_dev->page1 = buf;
ses_dev->page1_len = len;
buf = NULL;
@@ -780,9 +774,11 @@ static int ses_intf_add(struct device *cdev,
buf = NULL;
}
page2_not_supported:
- scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
- if (!scomp)
- goto err_free;
+ if (components > 0) {
+ scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
+ if (!scomp)
+ goto err_free;
+ }
edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev),
components, &ses_enclosure_callbacks);
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index bd87d3c92dd3..69347b6bf60c 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -632,7 +632,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
if (ret) {
dev_err(dev, "Failed to request irq\n");
- return ret;
+ goto err_irq;
}
ret = rockchip_sfc_init(sfc);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 44b85a8d47f1..7bc14fb309a6 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -4456,6 +4456,11 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action,
return NOTIFY_OK;
}
+ /*
+ * Clear the flag before adding the device so that fw_devlink
+ * doesn't skip adding consumers to this device.
+ */
+ rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
spi = of_register_spi_device(ctlr, rd->dn);
put_device(&ctlr->dev);
diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
index 290b1bb0e9cd..df5fb5410b72 100644
--- a/drivers/tee/optee/call.c
+++ b/drivers/tee/optee/call.c
@@ -488,7 +488,7 @@ static bool is_normal_memory(pgprot_t p)
#elif defined(CONFIG_ARM64)
return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
#else
-#error "Unuspported architecture"
+#error "Unsupported architecture"
#endif
}
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index b1c6231defad..673cf0359494 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -32,7 +32,7 @@ static int shm_get_kernel_pages(unsigned long start, size_t page_count,
is_kmap_addr((void *)start)))
return -EINVAL;
- page = virt_to_page(start);
+ page = virt_to_page((void *)start);
for (n = 0; n < page_count; n++) {
pages[n] = page + n;
get_page(pages[n]);
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 90526f46c9b1..d71ee50e7878 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -153,7 +153,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp
cancel_delayed_work_sync(&pci_info->work);
proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0);
- thermal_zone_device_disable(tzd);
pci_info->stored_thres = 0;
return 0;
}
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index c7ba5680cd48..91fc7e239497 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -235,6 +235,12 @@ static int max_idle_set(const char *arg, const struct kernel_param *kp)
goto skip_limit_set;
}
+ if (!cpumask_available(idle_injection_cpu_mask)) {
+ ret = allocate_copy_idle_injection_mask(cpu_present_mask);
+ if (ret)
+ goto skip_limit_set;
+ }
+
if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
ret = -EINVAL;
goto skip_limit_set;
@@ -791,7 +797,8 @@ static int __init powerclamp_init(void)
return retval;
mutex_lock(&powerclamp_lock);
- retval = allocate_copy_idle_injection_mask(cpu_present_mask);
+ if (!cpumask_available(idle_injection_cpu_mask))
+ retval = allocate_copy_idle_injection_mask(cpu_present_mask);
mutex_unlock(&powerclamp_lock);
if (retval)
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index 2e22bb82b738..e69868e868eb 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -193,8 +193,67 @@ static const struct attribute_group thermal_attr_group = {
#define THERM_THROT_POLL_INTERVAL HZ
#define THERM_STATUS_PROCHOT_LOG BIT(1)
-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
-#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
+static u64 therm_intr_core_clear_mask;
+static u64 therm_intr_pkg_clear_mask;
+
+static void thermal_intr_init_core_clear_mask(void)
+{
+ if (therm_intr_core_clear_mask)
+ return;
+
+ /*
+ * Reference: Intel SDM Volume 4
+ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C
+ * IA32_THERM_STATUS.
+ */
+
+ /*
+ * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not
+ * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
+ */
+ therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
+
+ /*
+ * Bit 7 and 9: Thermal Threshold #1 and #2 log
+ * If CPUID.01H:ECX[8] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_TM2))
+ therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
+
+ /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */
+ if (boot_cpu_has(X86_FEATURE_PLN))
+ therm_intr_core_clear_mask |= BIT(11);
+
+ /*
+ * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
+ * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_HWP))
+ therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
+}
+
+static void thermal_intr_init_pkg_clear_mask(void)
+{
+ if (therm_intr_pkg_clear_mask)
+ return;
+
+ /*
+ * Reference: Intel SDM Volume 4
+ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1
+ * IA32_PACKAGE_THERM_STATUS.
+ */
+
+ /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
+ if (boot_cpu_has(X86_FEATURE_PTS))
+ therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
+
+ /*
+ * Intel SDM Volume 2A: Thermal and Power Management Leaf
+ * Bit 26: CPUID.06H: EAX[19] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_HFI))
+ therm_intr_pkg_clear_mask |= BIT(26);
+}
/*
* Clear the bits in package thermal status register for bit = 1
@@ -207,13 +266,10 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask)
if (level == CORE_LEVEL) {
msr = MSR_IA32_THERM_STATUS;
- msr_val = THERM_STATUS_CLEAR_CORE_MASK;
+ msr_val = therm_intr_core_clear_mask;
} else {
msr = MSR_IA32_PACKAGE_THERM_STATUS;
- msr_val = THERM_STATUS_CLEAR_PKG_MASK;
- if (boot_cpu_has(X86_FEATURE_HFI))
- msr_val |= BIT(26);
-
+ msr_val = therm_intr_pkg_clear_mask;
}
msr_val &= ~bit_mask;
@@ -708,6 +764,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);
+ thermal_intr_init_core_clear_mask();
+ thermal_intr_init_pkg_clear_mask();
+
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
wrmsr(MSR_IA32_THERM_INTERRUPT,
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index a4aba7b8bb8b..6c20c9f90a05 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -876,8 +876,6 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
unsigned long states = cdev->max_state + 1;
int var;
- lockdep_assert_held(&cdev->lock);
-
var = sizeof(*stats);
var += sizeof(*stats->time_in_state) * states;
var += sizeof(*stats->trans_table) * states * states;
@@ -903,8 +901,6 @@ out:
static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
{
- lockdep_assert_held(&cdev->lock);
-
kfree(cdev->stats);
cdev->stats = NULL;
}
@@ -931,6 +927,8 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev)
void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev)
{
+ lockdep_assert_held(&cdev->lock);
+
cooling_device_stats_destroy(cdev);
cooling_device_stats_setup(cdev);
}
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fa43df05342b..3ba9c8b93ae6 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1903,6 +1903,17 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status);
static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir)
{
switch (iir & 0x3f) {
+ case UART_IIR_THRI:
+ /*
+ * Postpone DMA or not decision to IIR_RDI or IIR_RX_TIMEOUT
+ * because it's impossible to do an informed decision about
+ * that with IIR_THRI.
+ *
+ * This also fixes one known DMA Rx corruption issue where
+ * DR is asserted but DMA Rx only gets a corrupted zero byte
+ * (too early DR?).
+ */
+ return false;
case UART_IIR_RDI:
if (!up->dma->rx_running)
break;
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 56e6ba3250cd..074bfed57fc9 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -858,11 +858,17 @@ static unsigned int lpuart32_tx_empty(struct uart_port *port)
struct lpuart_port, port);
unsigned long stat = lpuart32_read(port, UARTSTAT);
unsigned long sfifo = lpuart32_read(port, UARTFIFO);
+ unsigned long ctrl = lpuart32_read(port, UARTCTRL);
if (sport->dma_tx_in_progress)
return 0;
- if (stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT)
+ /*
+ * LPUART Transmission Complete Flag may never be set while queuing a break
+ * character, so avoid checking for transmission complete when UARTCTRL_SBK
+ * is asserted.
+ */
+ if ((stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT) || ctrl & UARTCTRL_SBK)
return TIOCSER_TEMT;
return 0;
@@ -2942,7 +2948,7 @@ static bool lpuart_uport_is_active(struct lpuart_port *sport)
tty = tty_port_tty_get(port);
if (tty) {
tty_dev = tty->dev;
- may_wake = device_may_wakeup(tty_dev);
+ may_wake = tty_dev && device_may_wakeup(tty_dev);
tty_kref_put(tty);
}
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 7bd080720929..caa09a0c48f4 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -31,6 +31,7 @@
#include <linux/ioport.h>
#include <linux/ktime.h>
#include <linux/major.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/of.h>
@@ -2864,6 +2865,13 @@ static int sci_init_single(struct platform_device *dev,
sci_port->irqs[i] = platform_get_irq(dev, i);
}
+ /*
+ * The fourth interrupt on SCI port is transmit end interrupt, so
+ * shuffle the interrupts.
+ */
+ if (p->type == PORT_SCI)
+ swap(sci_port->irqs[SCIx_BRI_IRQ], sci_port->irqs[SCIx_TEI_IRQ]);
+
/* The SCI generates several interrupts. They can be muxed together or
* connected to different interrupt lines. In the muxed case only one
* interrupt resource is specified as there is only one interrupt ID.
@@ -2929,7 +2937,7 @@ static int sci_init_single(struct platform_device *dev,
port->flags = UPF_FIXED_PORT | UPF_BOOT_AUTOCONF | p->flags;
port->fifosize = sci_port->params->fifosize;
- if (port->type == PORT_SCI) {
+ if (port->type == PORT_SCI && !dev->dev.of_node) {
if (sci_port->reg_size >= 0x20)
port->regshift = 2;
else
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 37e178a9ac47..70b112038792 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1409,13 +1409,6 @@ static int ufshcd_devfreq_target(struct device *dev,
struct ufs_clk_info *clki;
unsigned long irq_flags;
- /*
- * Skip devfreq if UFS initialization is not finished.
- * Otherwise ufs could be in a inconsistent state.
- */
- if (!smp_load_acquire(&hba->logical_unit_scan_finished))
- return 0;
-
if (!ufshcd_is_clkscaling_supported(hba))
return -EINVAL;
@@ -8399,6 +8392,22 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
if (ret)
goto out;
+ /* Initialize devfreq after UFS device is detected */
+ if (ufshcd_is_clkscaling_supported(hba)) {
+ memcpy(&hba->clk_scaling.saved_pwr_info.info,
+ &hba->pwr_info,
+ sizeof(struct ufs_pa_layer_attr));
+ hba->clk_scaling.saved_pwr_info.is_valid = true;
+ hba->clk_scaling.is_allowed = true;
+
+ ret = ufshcd_devfreq_init(hba);
+ if (ret)
+ goto out;
+
+ hba->clk_scaling.is_enabled = true;
+ ufshcd_init_clk_scaling_sysfs(hba);
+ }
+
ufs_bsg_probe(hba);
ufshpb_init(hba);
scsi_scan_host(hba->host);
@@ -8670,12 +8679,6 @@ out:
if (ret) {
pm_runtime_put_sync(hba->dev);
ufshcd_hba_exit(hba);
- } else {
- /*
- * Make sure that when reader code sees UFS initialization has finished,
- * all initialization steps have really been executed.
- */
- smp_store_release(&hba->logical_unit_scan_finished, true);
}
}
@@ -10316,30 +10319,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
*/
ufshcd_set_ufs_dev_active(hba);
- /* Initialize devfreq */
- if (ufshcd_is_clkscaling_supported(hba)) {
- memcpy(&hba->clk_scaling.saved_pwr_info.info,
- &hba->pwr_info,
- sizeof(struct ufs_pa_layer_attr));
- hba->clk_scaling.saved_pwr_info.is_valid = true;
- hba->clk_scaling.is_allowed = true;
-
- err = ufshcd_devfreq_init(hba);
- if (err)
- goto rpm_put_sync;
-
- hba->clk_scaling.is_enabled = true;
- ufshcd_init_clk_scaling_sysfs(hba);
- }
-
async_schedule(ufshcd_async_scan, hba);
ufs_sysfs_add_nodes(hba->dev);
device_enable_async_suspend(dev);
return 0;
-rpm_put_sync:
- pm_runtime_put_sync(dev);
free_tmf_queue:
blk_mq_destroy_queue(hba->tmf_queue);
blk_put_queue(hba->tmf_queue);
diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c
index d63d5d92f255..f317d3c84781 100644
--- a/drivers/usb/cdns3/cdnsp-ep0.c
+++ b/drivers/usb/cdns3/cdnsp-ep0.c
@@ -414,7 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev,
void cdnsp_setup_analyze(struct cdnsp_device *pdev)
{
struct usb_ctrlrequest *ctrl = &pdev->setup;
- int ret = 0;
+ int ret = -EINVAL;
u16 len;
trace_cdnsp_ctrl_req(ctrl);
@@ -424,7 +424,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
if (pdev->gadget.state == USB_STATE_NOTATTACHED) {
dev_err(pdev->dev, "ERR: Setup detected in unattached state\n");
- ret = -EINVAL;
goto out;
}
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index a23ddbb81979..560793545362 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -49,6 +49,7 @@
#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
#define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1
#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
+#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -474,6 +475,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index ddfc537c7526..56cdfb2e4211 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1251,7 +1251,7 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
p->kiocb = kiocb;
if (p->aio) {
p->to_free = dup_iter(&p->data, to, GFP_KERNEL);
- if (!p->to_free) {
+ if (!iter_is_ubuf(&p->data) && !p->to_free) {
kfree(p);
return -ENOMEM;
}
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index d605bc2e7e8f..28249d0bf062 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -614,7 +614,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (!priv)
goto fail;
priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL);
- if (!priv->to_free) {
+ if (!iter_is_ubuf(&priv->to) && !priv->to_free) {
kfree(priv);
goto fail;
}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index fb988e4ea924..6db07ca419c3 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -771,12 +771,11 @@ static struct pci_driver xhci_pci_driver = {
/* suspend and resume implemented later */
.shutdown = usb_hcd_pci_shutdown,
- .driver = {
#ifdef CONFIG_PM
- .pm = &usb_hcd_pci_pm_ops,
-#endif
- .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .driver = {
+ .pm = &usb_hcd_pci_pm_ops
},
+#endif
};
static int __init xhci_pci_init(void)
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 1ff22f675930..a88c39e525c2 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1360,6 +1360,9 @@ static void tegra_xhci_id_work(struct work_struct *work)
mutex_unlock(&tegra->lock);
+ tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl,
+ tegra->otg_usb2_port);
+
if (tegra->host_mode) {
/* switch to host mode */
if (tegra->otg_usb3_port >= 0) {
@@ -1474,9 +1477,6 @@ static int tegra_xhci_id_notify(struct notifier_block *nb,
}
tegra->otg_usb2_port = tegra_xusb_get_usb2_port(tegra, usbphy);
- tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(
- tegra->padctl,
- tegra->otg_usb2_port);
tegra->host_mode = (usbphy->last_event == USB_EVENT_ID) ? true : false;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6183ce8574b1..6307bae9cddf 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -9,6 +9,7 @@
*/
#include <linux/pci.h>
+#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/irq.h>
#include <linux/log2.h>
@@ -228,6 +229,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
{
struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
+ struct iommu_domain *domain;
int err, i;
u64 val;
u32 intrs;
@@ -246,7 +248,9 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
* an iommu. Doing anything when there is no iommu is definitely
* unsafe...
*/
- if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev))
+ domain = iommu_get_domain_for_dev(dev);
+ if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !domain ||
+ domain->type == IOMMU_DOMAIN_IDENTITY)
return;
xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n");
@@ -4438,6 +4442,7 @@ static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci,
if (!virt_dev || max_exit_latency == virt_dev->current_mel) {
spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_free_command(xhci, command);
return 0;
}
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 832ad592b7ef..cdea1bff3b70 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -120,6 +120,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
{ USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
{ USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
+ { USB_DEVICE(0x10C4, 0x82AA) }, /* Silicon Labs IFS-USB-DATACABLE used with Quint UPS */
{ USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */
{ USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */
{ USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e6d8d9b35ad0..f31cc3c76329 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1198,6 +1198,8 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */
+ .driver_info = ZLP },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
@@ -1300,6 +1302,14 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */
.driver_info = RSVD(0) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */
+ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */
+ .driver_info = NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */
+ .driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 662cd043b50e..8f3e884222ad 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -112,8 +112,12 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
if (dp->data.status & DP_STATUS_PREFER_MULTI_FUNC &&
pin_assign & DP_PIN_ASSIGN_MULTI_FUNC_MASK)
pin_assign &= DP_PIN_ASSIGN_MULTI_FUNC_MASK;
- else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK)
+ else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK) {
pin_assign &= DP_PIN_ASSIGN_DP_ONLY_MASK;
+ /* Default to pin assign C if available */
+ if (pin_assign & BIT(DP_PIN_ASSIGN_C))
+ pin_assign = BIT(DP_PIN_ASSIGN_C);
+ }
if (!pin_assign)
return -EINVAL;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 520646ae7fa0..195963b82b63 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2467,10 +2467,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
err = 0;
goto out;
}
+ mlx5_vdpa_add_debugfs(ndev);
err = setup_virtqueues(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
- goto out;
+ goto err_setup;
}
err = create_rqt(ndev);
@@ -2500,6 +2501,8 @@ err_tir:
destroy_rqt(ndev);
err_rqt:
teardown_virtqueues(ndev);
+err_setup:
+ mlx5_vdpa_remove_debugfs(ndev->debugfs);
out:
return err;
}
@@ -2513,6 +2516,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev)
if (!ndev->setup)
return;
+ mlx5_vdpa_remove_debugfs(ndev->debugfs);
+ ndev->debugfs = NULL;
teardown_steering(ndev);
destroy_tir(ndev);
destroy_rqt(ndev);
@@ -3261,7 +3266,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
if (err)
goto err_reg;
- mlx5_vdpa_add_debugfs(ndev);
mgtdev->ndev = ndev;
return 0;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index 862f405362de..dfe2ce341803 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -466,16 +466,21 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
vdpasim_net_setup_config(simdev, config);
- ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
- if (ret)
- goto reg_err;
-
net = sim_to_net(simdev);
u64_stats_init(&net->tx_stats.syncp);
u64_stats_init(&net->rx_stats.syncp);
u64_stats_init(&net->cq_stats.syncp);
+ /*
+ * Initialization must be completed before this call, since it can
+ * connect the device to the vDPA bus, so requests can arrive after
+ * this call.
+ */
+ ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
+ if (ret)
+ goto reg_err;
+
return 0;
reg_err:
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index b244e7c0f514..32d0be968103 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -125,7 +125,6 @@ struct vhost_scsi_tpg {
struct se_portal_group se_tpg;
/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
struct vhost_scsi *vhost_scsi;
- struct list_head tmf_queue;
};
struct vhost_scsi_tport {
@@ -206,10 +205,8 @@ struct vhost_scsi {
struct vhost_scsi_tmf {
struct vhost_work vwork;
- struct vhost_scsi_tpg *tpg;
struct vhost_scsi *vhost;
struct vhost_scsi_virtqueue *svq;
- struct list_head queue_entry;
struct se_cmd se_cmd;
u8 scsi_resp;
@@ -352,12 +349,9 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf)
{
- struct vhost_scsi_tpg *tpg = tmf->tpg;
struct vhost_scsi_inflight *inflight = tmf->inflight;
- mutex_lock(&tpg->tv_tpg_mutex);
- list_add_tail(&tpg->tmf_queue, &tmf->queue_entry);
- mutex_unlock(&tpg->tv_tpg_mutex);
+ kfree(tmf);
vhost_scsi_put_inflight(inflight);
}
@@ -1194,19 +1188,11 @@ vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg,
goto send_reject;
}
- mutex_lock(&tpg->tv_tpg_mutex);
- if (list_empty(&tpg->tmf_queue)) {
- pr_err("Missing reserve TMF. Could not handle LUN RESET.\n");
- mutex_unlock(&tpg->tv_tpg_mutex);
+ tmf = kzalloc(sizeof(*tmf), GFP_KERNEL);
+ if (!tmf)
goto send_reject;
- }
-
- tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf,
- queue_entry);
- list_del_init(&tmf->queue_entry);
- mutex_unlock(&tpg->tv_tpg_mutex);
- tmf->tpg = tpg;
+ vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work);
tmf->vhost = vs;
tmf->svq = svq;
tmf->resp_iov = vq->iov[vc->out];
@@ -1658,7 +1644,10 @@ undepend:
for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
tpg = vs_tpg[i];
if (tpg) {
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->vhost_scsi = NULL;
tpg->tv_tpg_vhost_count--;
+ mutex_unlock(&tpg->tv_tpg_mutex);
target_undepend_item(&tpg->se_tpg.tpg_group.cg_item);
}
}
@@ -2032,19 +2021,11 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg,
{
struct vhost_scsi_tpg *tpg = container_of(se_tpg,
struct vhost_scsi_tpg, se_tpg);
- struct vhost_scsi_tmf *tmf;
-
- tmf = kzalloc(sizeof(*tmf), GFP_KERNEL);
- if (!tmf)
- return -ENOMEM;
- INIT_LIST_HEAD(&tmf->queue_entry);
- vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work);
mutex_lock(&vhost_scsi_mutex);
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count++;
- list_add_tail(&tmf->queue_entry, &tpg->tmf_queue);
mutex_unlock(&tpg->tv_tpg_mutex);
vhost_scsi_hotplug(tpg, lun);
@@ -2059,16 +2040,11 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
{
struct vhost_scsi_tpg *tpg = container_of(se_tpg,
struct vhost_scsi_tpg, se_tpg);
- struct vhost_scsi_tmf *tmf;
mutex_lock(&vhost_scsi_mutex);
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count--;
- tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf,
- queue_entry);
- list_del(&tmf->queue_entry);
- kfree(tmf);
mutex_unlock(&tpg->tv_tpg_mutex);
vhost_scsi_hotunplug(tpg, lun);
@@ -2329,7 +2305,6 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name)
}
mutex_init(&tpg->tv_tpg_mutex);
INIT_LIST_HEAD(&tpg->tv_tpg_list);
- INIT_LIST_HEAD(&tpg->tmf_queue);
tpg->tport = tport;
tpg->tport_tpgt = tpgt;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 0a2c47df01f4..eb565a10e5cd 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -823,7 +823,7 @@ static int set_con2fb_map(int unit, int newidx, int user)
int oldidx = con2fb_map[unit];
struct fb_info *info = fbcon_registered_fb[newidx];
struct fb_info *oldinfo = NULL;
- int found, err = 0, show_logo;
+ int err = 0, show_logo;
WARN_CONSOLE_UNLOCKED();
@@ -841,26 +841,26 @@ static int set_con2fb_map(int unit, int newidx, int user)
if (oldidx != -1)
oldinfo = fbcon_registered_fb[oldidx];
- found = search_fb_in_map(newidx);
-
- if (!err && !found) {
+ if (!search_fb_in_map(newidx)) {
err = con2fb_acquire_newinfo(vc, info, unit);
- if (!err)
- con2fb_map[unit] = newidx;
+ if (err)
+ return err;
+
+ fbcon_add_cursor_work(info);
}
+ con2fb_map[unit] = newidx;
+
/*
* If old fb is not mapped to any of the consoles,
* fbcon should release it.
*/
- if (!err && oldinfo && !search_fb_in_map(oldidx))
+ if (oldinfo && !search_fb_in_map(oldidx))
con2fb_release_oldinfo(vc, oldinfo, info);
show_logo = (fg_console == 0 && !user &&
logo_shown != FBCON_LOGO_DONTSHOW);
- if (!found)
- fbcon_add_cursor_work(info);
con2fb_map_boot[unit] = newidx;
con2fb_init_display(vc, info, unit, show_logo);
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 875541ff185b..3fd95a79e4c3 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1116,6 +1116,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
case FBIOPUT_VSCREENINFO:
if (copy_from_user(&var, argp, sizeof(var)))
return -EFAULT;
+ /* only for kernel-internal use */
+ var.activate &= ~FB_ACTIVATE_KD_TEXT;
console_lock();
lock_fb_info(info);
ret = fbcon_modechange_possible(info, &var);
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 50f7f3f6b55e..1974a38bce20 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -35,10 +35,12 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
return retval;
}
if (attr_size > buffer_size) {
- if (!buffer_size) /* request to get the attr_size */
- retval = attr_size;
- else
+ if (buffer_size)
retval = -ERANGE;
+ else if (attr_size > SSIZE_MAX)
+ retval = -EOVERFLOW;
+ else /* request to get the attr_size */
+ retval = attr_size;
} else {
iov_iter_truncate(&to, attr_size);
retval = p9_client_read(attr_fid, 0, &to, &err);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 90e40d5ceccd..e54f0884802a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1921,8 +1921,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
level = -1;
ULIST_ITER_INIT(&uiter);
while (1) {
- bool is_shared;
- bool cached;
+ const unsigned long prev_ref_count = ctx->refs.nnodes;
walk_ctx.bytenr = bytenr;
ret = find_parent_nodes(&walk_ctx, &shared);
@@ -1940,21 +1939,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
ret = 0;
/*
- * If our data extent was not directly shared (without multiple
- * reference items), than it might have a single reference item
- * with a count > 1 for the same offset, which means there are 2
- * (or more) file extent items that point to the data extent -
- * this happens when a file extent item needs to be split and
- * then one item gets moved to another leaf due to a b+tree leaf
- * split when inserting some item. In this case the file extent
- * items may be located in different leaves and therefore some
- * of the leaves may be referenced through shared subtrees while
- * others are not. Since our extent buffer cache only works for
- * a single path (by far the most common case and simpler to
- * deal with), we can not use it if we have multiple leaves
- * (which implies multiple paths).
+ * More than one extent buffer (bytenr) may have been added to
+ * the ctx->refs ulist, in which case we have to check multiple
+ * tree paths in case the first one is not shared, so we can not
+ * use the path cache which is made for a single path. Multiple
+ * extent buffers at the current level happen when:
+ *
+ * 1) level -1, the data extent: If our data extent was not
+ * directly shared (without multiple reference items), then
+ * it might have a single reference item with a count > 1 for
+ * the same offset, which means there are 2 (or more) file
+ * extent items that point to the data extent - this happens
+ * when a file extent item needs to be split and then one
+ * item gets moved to another leaf due to a b+tree leaf split
+ * when inserting some item. In this case the file extent
+ * items may be located in different leaves and therefore
+ * some of the leaves may be referenced through shared
+ * subtrees while others are not. Since our extent buffer
+ * cache only works for a single path (by far the most common
+ * case and simpler to deal with), we can not use it if we
+ * have multiple leaves (which implies multiple paths).
+ *
+ * 2) level >= 0, a tree node/leaf: We can have a mix of direct
+ * and indirect references on a b+tree node/leaf, so we have
+ * to check multiple paths, and the extent buffer (the
+ * current bytenr) may be shared or not. One example is
+ * during relocation as we may get a shared tree block ref
+ * (direct ref) and a non-shared tree block ref (indirect
+ * ref) for the same node/leaf.
*/
- if (level == -1 && ctx->refs.nnodes > 1)
+ if ((ctx->refs.nnodes - prev_ref_count) > 1)
ctx->use_path_cache = false;
if (level >= 0)
@@ -1964,12 +1978,17 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
if (!node)
break;
bytenr = node->val;
- level++;
- cached = lookup_backref_shared_cache(ctx, root, bytenr, level,
- &is_shared);
- if (cached) {
- ret = (is_shared ? 1 : 0);
- break;
+ if (ctx->use_path_cache) {
+ bool is_shared;
+ bool cached;
+
+ level++;
+ cached = lookup_backref_shared_cache(ctx, root, bytenr,
+ level, &is_shared);
+ if (cached) {
+ ret = (is_shared ? 1 : 0);
+ break;
+ }
}
shared.share_count = 0;
shared.have_delayed_delete_refs = false;
@@ -1977,6 +1996,28 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
}
/*
+ * If the path cache is disabled, then it means at some tree level we
+ * got multiple parents due to a mix of direct and indirect backrefs or
+ * multiple leaves with file extent items pointing to the same data
+ * extent. We have to invalidate the cache and cache only the sharedness
+ * result for the levels where we got only one node/reference.
+ */
+ if (!ctx->use_path_cache) {
+ int i = 0;
+
+ level--;
+ if (ret >= 0 && level >= 0) {
+ bytenr = ctx->path_cache_entries[level].bytenr;
+ ctx->use_path_cache = true;
+ store_backref_shared_cache(ctx, root, bytenr, level, ret);
+ i = level + 1;
+ }
+
+ for ( ; i < BTRFS_MAX_LEVEL; i++)
+ ctx->path_cache_entries[i].bytenr = 0;
+ }
+
+ /*
* Cache the sharedness result for the data extent if we know our inode
* has more than 1 file extent item that refers to the data extent.
*/
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b53f0e30ce2b..9e1596bb208d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2250,6 +2250,20 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
fs_info->csum_shash = csum_shash;
+ /*
+ * Check if the checksum implementation is a fast accelerated one.
+ * As-is this is a bit of a hack and should be replaced once the csum
+ * implementations provide that information themselves.
+ */
+ switch (csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
+ set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
+ break;
+ default:
+ break;
+ }
+
btrfs_info(fs_info, "using %s (%s) checksum algorithm",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(csum_shash));
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a0ef1a1784c7..ba769a1eb87a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3732,7 +3732,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
}
/* update qgroup status and info */
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
err = btrfs_run_qgroups(trans);
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (err < 0)
btrfs_handle_fs_error(fs_info, err,
"failed to update qgroup status and info");
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 52a7d2fa2284..f41da7ac360d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2828,13 +2828,22 @@ cleanup:
}
/*
- * called from commit_transaction. Writes all changed qgroups to disk.
+ * Writes all changed qgroups to disk.
+ * Called by the transaction commit path and the qgroup assign ioctl.
*/
int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
+ /*
+ * In case we are called from the qgroup assign ioctl, assert that we
+ * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
+ * disable operation (ioctl) and access a freed quota root.
+ */
+ if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
+ lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
+
if (!fs_info->quota_root)
return ret;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 581845bc206a..366fb4cde145 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1516,8 +1516,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
s->s_id);
btrfs_sb(s)->bdev_holder = fs_type;
- if (!strstr(crc32c_impl(), "generic"))
- set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
error = btrfs_fill_super(s, fs_devices, data);
}
if (!error)
@@ -1631,6 +1629,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
+ workqueue_set_max_active(fs_info->endio_workers, new_pool_size);
+ workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 18329ebcb1cb..b8d5b1fa9a03 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
if (current->journal_info == trans)
current->journal_info = NULL;
- btrfs_scrub_cancel(fs_info);
+
+ /*
+ * If relocation is running, we can't cancel scrub because that will
+ * result in a deadlock. Before relocating a block group, relocation
+ * pauses scrub, then starts and commits a transaction before unpausing
+ * scrub. If the transaction commit is being done by the relocation
+ * task or triggered by another task and the relocation task is waiting
+ * for the commit, and we end up here due to an error in the commit
+ * path, then calling btrfs_scrub_cancel() will deadlock, as we are
+ * asking for scrub to stop while having it asked to be paused higher
+ * above in relocation code.
+ */
+ if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
+ btrfs_scrub_cancel(fs_info);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6d0124b6e79e..c6d592870400 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
- flags |= FMODE_EXCL;
+ /*
+ * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may
+ * initiate the device scan which may race with the user's mount
+ * or mkfs command, resulting in failure.
+ * Since the device scan is solely for reading purposes, there is
+ * no need for FMODE_EXCL. Additionally, the devices are read again
+ * during the mount process. It is ok to get some inconsistent
+ * values temporarily, as the device paths of the fsid are the only
+ * required information for assembling the volume.
+ */
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
@@ -3266,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
btrfs_scrub_pause(fs_info);
ret = btrfs_relocate_block_group(fs_info, chunk_offset);
btrfs_scrub_continue(fs_info);
- if (ret)
+ if (ret) {
+ /*
+ * If we had a transaction abort, stop all running scrubs.
+ * See transaction.c:cleanup_transaction() why we do it here.
+ */
+ if (BTRFS_FS_ERROR(fs_info))
+ btrfs_scrub_cancel(fs_info);
return ret;
+ }
block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
if (!block_group)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 71fe0a0a7992..415176b2cf32 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -124,7 +124,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops;
#ifdef CONFIG_CIFS_DFS_UPCALL
extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#else
-#define cifs_dfs_d_automount NULL
+static inline struct vfsmount *cifs_dfs_d_automount(struct path *path)
+{
+ return ERR_PTR(-EREMOTE);
+}
#endif
/* Functions related to symlinks */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 38a697eca305..9d963caec35c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -71,7 +71,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
int rc;
struct cifs_ses *ses;
struct TCP_Server_Info *server;
- struct nls_table *nls_codepage;
+ struct nls_table *nls_codepage = NULL;
/*
* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
@@ -99,6 +99,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
spin_unlock(&tcon->tc_lock);
+again:
rc = cifs_wait_for_server_reconnect(server, tcon->retry);
if (rc)
return rc;
@@ -110,8 +111,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
spin_unlock(&ses->chan_lock);
- nls_codepage = load_nls_default();
-
+ mutex_lock(&ses->session_mutex);
/*
* Recheck after acquire mutex. If another thread is negotiating
* and the server never sends an answer the socket will be closed
@@ -120,29 +120,38 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
spin_lock(&server->srv_lock);
if (server->tcpStatus == CifsNeedReconnect) {
spin_unlock(&server->srv_lock);
+ mutex_unlock(&ses->session_mutex);
+
+ if (tcon->retry)
+ goto again;
rc = -EHOSTDOWN;
goto out;
}
spin_unlock(&server->srv_lock);
+ nls_codepage = load_nls_default();
+
/*
* need to prevent multiple threads trying to simultaneously
* reconnect the same SMB session
*/
+ spin_lock(&ses->ses_lock);
spin_lock(&ses->chan_lock);
- if (!cifs_chan_needs_reconnect(ses, server)) {
+ if (!cifs_chan_needs_reconnect(ses, server) &&
+ ses->ses_status == SES_GOOD) {
spin_unlock(&ses->chan_lock);
+ spin_unlock(&ses->ses_lock);
/* this means that we only need to tree connect */
if (tcon->need_reconnect)
goto skip_sess_setup;
- rc = -EHOSTDOWN;
+ mutex_unlock(&ses->session_mutex);
goto out;
}
spin_unlock(&ses->chan_lock);
+ spin_unlock(&ses->ses_lock);
- mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(0, ses, server);
if (!rc)
rc = cifs_setup_session(0, ses, server, nls_codepage);
@@ -4373,8 +4382,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
return -ENODEV;
getDFSRetry:
- rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
- (void **) &pSMBr);
+ /*
+ * Use smb_init_no_reconnect() instead of smb_init() as
+ * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus
+ * causing an infinite recursion.
+ */
+ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc,
+ (void **)&pSMB, (void **)&pSMBr);
if (rc)
return rc;
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 6d13f8207e96..ace11a1a7c8a 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -441,13 +441,14 @@ out:
* but there are some bugs that prevent rename from working if there are
* multiple delimiters.
*
- * Returns a sanitized duplicate of @path. The caller is responsible for
- * cleaning up the original.
+ * Returns a sanitized duplicate of @path. @gfp indicates the GFP_* flags
+ * for kstrdup.
+ * The caller is responsible for freeing the original.
*/
#define IS_DELIM(c) ((c) == '/' || (c) == '\\')
-static char *sanitize_path(char *path)
+char *cifs_sanitize_prepath(char *prepath, gfp_t gfp)
{
- char *cursor1 = path, *cursor2 = path;
+ char *cursor1 = prepath, *cursor2 = prepath;
/* skip all prepended delimiters */
while (IS_DELIM(*cursor1))
@@ -469,7 +470,7 @@ static char *sanitize_path(char *path)
cursor2--;
*(cursor2) = '\0';
- return kstrdup(path, GFP_KERNEL);
+ return kstrdup(prepath, gfp);
}
/*
@@ -531,7 +532,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
if (!*pos)
return 0;
- ctx->prepath = sanitize_path(pos);
+ ctx->prepath = cifs_sanitize_prepath(pos, GFP_KERNEL);
if (!ctx->prepath)
return -ENOMEM;
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 3de00e7127ec..f4eaf8558902 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -287,4 +287,7 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
*/
#define SMB3_MAX_DCLOSETIMEO (1 << 30)
#define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */
+
+extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+
#endif
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b44fb51968bf..7f085ed2d866 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1195,7 +1195,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
kfree(cifs_sb->prepath);
if (prefix && *prefix) {
- cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC);
+ cifs_sb->prepath = cifs_sanitize_prepath(prefix, GFP_ATOMIC);
if (!cifs_sb->prepath)
return -ENOMEM;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 6bd2aa6af18f..4245249dbba8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -310,7 +310,6 @@ out:
case SMB2_READ:
case SMB2_WRITE:
case SMB2_LOCK:
- case SMB2_IOCTL:
case SMB2_QUERY_DIRECTORY:
case SMB2_CHANGE_NOTIFY:
case SMB2_QUERY_INFO:
@@ -588,11 +587,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
}
+/* If invalid preauth context warn but use what we requested, SHA-512 */
static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
{
unsigned int len = le16_to_cpu(ctxt->DataLength);
- /* If invalid preauth context warn but use what we requested, SHA-512 */
+ /*
+ * Caller checked that DataLength remains within SMB boundary. We still
+ * need to confirm that one HashAlgorithms member is accounted for.
+ */
if (len < MIN_PREAUTH_CTXT_DATA_LEN) {
pr_warn_once("server sent bad preauth context\n");
return;
@@ -611,7 +614,11 @@ static void decode_compress_ctx(struct TCP_Server_Info *server,
{
unsigned int len = le16_to_cpu(ctxt->DataLength);
- /* sizeof compress context is a one element compression capbility struct */
+ /*
+ * Caller checked that DataLength remains within SMB boundary. We still
+ * need to confirm that one CompressionAlgorithms member is accounted
+ * for.
+ */
if (len < 10) {
pr_warn_once("server sent bad compression cntxt\n");
return;
@@ -633,6 +640,11 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
unsigned int len = le16_to_cpu(ctxt->DataLength);
cifs_dbg(FYI, "decode SMB3.11 encryption neg context of len %d\n", len);
+ /*
+ * Caller checked that DataLength remains within SMB boundary. We still
+ * need to confirm that one Cipher flexible array member is accounted
+ * for.
+ */
if (len < MIN_ENCRYPT_CTXT_DATA_LEN) {
pr_warn_once("server sent bad crypto ctxt len\n");
return -EINVAL;
@@ -679,6 +691,11 @@ static void decode_signing_ctx(struct TCP_Server_Info *server,
{
unsigned int len = le16_to_cpu(pctxt->DataLength);
+ /*
+ * Caller checked that DataLength remains within SMB boundary. We still
+ * need to confirm that one SigningAlgorithms flexible array member is
+ * accounted for.
+ */
if ((len < 4) || (len > 16)) {
pr_warn_once("server sent bad signing negcontext\n");
return;
@@ -720,14 +737,19 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
for (i = 0; i < ctxt_cnt; i++) {
int clen;
/* check that offset is not beyond end of SMB */
- if (len_of_ctxts == 0)
- break;
-
if (len_of_ctxts < sizeof(struct smb2_neg_context))
break;
pctx = (struct smb2_neg_context *)(offset + (char *)rsp);
- clen = le16_to_cpu(pctx->DataLength);
+ clen = sizeof(struct smb2_neg_context)
+ + le16_to_cpu(pctx->DataLength);
+ /*
+ * 2.2.4 SMB2 NEGOTIATE Response
+ * Subsequent negotiate contexts MUST appear at the first 8-byte
+ * aligned offset following the previous negotiate context.
+ */
+ if (i + 1 != ctxt_cnt)
+ clen = ALIGN(clen, 8);
if (clen > len_of_ctxts)
break;
@@ -748,12 +770,10 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
else
cifs_server_dbg(VFS, "unknown negcontext of type %d ignored\n",
le16_to_cpu(pctx->ContextType));
-
if (rc)
break;
- /* offsets must be 8 byte aligned */
- clen = ALIGN(clen, 8);
- offset += clen + sizeof(struct smb2_neg_context);
+
+ offset += clen;
len_of_ctxts -= clen;
}
return rc;
diff --git a/fs/dax.c b/fs/dax.c
index 3e457a16c7d1..2ababb89918d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -781,6 +781,33 @@ out:
return ret;
}
+static int __dax_clear_dirty_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+{
+ XA_STATE(xas, &mapping->i_pages, start);
+ unsigned int scanned = 0;
+ void *entry;
+
+ xas_lock_irq(&xas);
+ xas_for_each(&xas, entry, end) {
+ entry = get_unlocked_entry(&xas, 0);
+ xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
+ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ put_unlocked_entry(&xas, entry, WAKE_NEXT);
+
+ if (++scanned % XA_CHECK_SCHED)
+ continue;
+
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
+ }
+ xas_unlock_irq(&xas);
+
+ return 0;
+}
+
/*
* Delete DAX entry at @index from @mapping. Wait for it
* to be unlocked before deleting it.
@@ -1258,15 +1285,20 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
/* don't bother with blocks that are not shared to start with */
if (!(iomap->flags & IOMAP_F_SHARED))
return length;
- /* don't bother with holes or unwritten extents */
- if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
- return length;
id = dax_read_lock();
ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
if (ret < 0)
goto out_unlock;
+ /* zero the distance if srcmap is HOLE or UNWRITTEN */
+ if (srcmap->flags & IOMAP_F_SHARED || srcmap->type == IOMAP_UNWRITTEN) {
+ memset(daddr, 0, length);
+ dax_flush(iomap->dax_dev, daddr, length);
+ ret = length;
+ goto out_unlock;
+ }
+
ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
if (ret < 0)
goto out_unlock;
@@ -1435,6 +1467,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
* written by write(2) is visible in mmap.
*/
if (iomap->flags & IOMAP_F_NEW || cow) {
+ /*
+ * Filesystem allows CoW on non-shared extents. The src extents
+ * may have been mmapped with dirty mark before. To be able to
+ * invalidate its dax entries, we need to clear the dirty mark
+ * in advance.
+ */
+ if (cow)
+ __dax_clear_dirty_range(iomi->inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ (end - 1) >> PAGE_SHIFT);
invalidate_inode_pages2_range(iomi->inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);
@@ -2022,8 +2064,8 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
(ret = iomap_iter(&dst_iter, ops)) > 0) {
- compared = dax_range_compare_iter(&src_iter, &dst_iter, len,
- same);
+ compared = dax_range_compare_iter(&src_iter, &dst_iter,
+ min(src_iter.len, dst_iter.len), same);
if (compared < 0)
return ret;
src_iter.processed = dst_iter.processed = compared;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 195dc23e0d83..1db3e3c24b43 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -978,6 +978,16 @@ restart:
continue;
}
+ /*
+ * If wb_tryget fails, the wb has been shutdown, skip it.
+ *
+ * Pin @wb so that it stays on @bdi->wb_list. This allows
+ * continuing iteration from @wb after dropping and
+ * regrabbing rcu read lock.
+ */
+ if (!wb_tryget(wb))
+ continue;
+
/* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work;
*work = *base_work;
@@ -986,13 +996,6 @@ restart:
work->done = &fallback_work_done;
wb_queue_work(wb, work);
-
- /*
- * Pin @wb so that it stays on @bdi->wb_list. This allows
- * continuing iteration from @wb after dropping and
- * regrabbing rcu read lock.
- */
- wb_get(wb);
last_wb = wb;
rcu_read_unlock();
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 115a67d2cf78..365ac32af505 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -112,10 +112,8 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
struct ksmbd_conn *conn = work->conn;
struct list_head *requests_queue = NULL;
- if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) {
+ if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE)
requests_queue = &conn->requests;
- work->synchronous = true;
- }
if (requests_queue) {
atomic_inc(&conn->req_running);
@@ -136,14 +134,14 @@ int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
if (!work->multiRsp)
atomic_dec(&conn->req_running);
- spin_lock(&conn->request_lock);
if (!work->multiRsp) {
+ spin_lock(&conn->request_lock);
list_del_init(&work->request_entry);
- if (!work->synchronous)
- list_del_init(&work->async_request_entry);
+ spin_unlock(&conn->request_lock);
+ if (work->asynchronous)
+ release_async_work(work);
ret = 0;
}
- spin_unlock(&conn->request_lock);
wake_up_all(&conn->req_running_q);
return ret;
@@ -326,10 +324,7 @@ int ksmbd_conn_handler_loop(void *p)
/* 4 for rfc1002 length field */
size = pdu_size + 4;
- conn->request_buf = kvmalloc(size,
- GFP_KERNEL |
- __GFP_NOWARN |
- __GFP_NORETRY);
+ conn->request_buf = kvmalloc(size, GFP_KERNEL);
if (!conn->request_buf)
break;
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h
index 3234f2cf6327..f8ae6144c0ae 100644
--- a/fs/ksmbd/ksmbd_work.h
+++ b/fs/ksmbd/ksmbd_work.h
@@ -68,7 +68,7 @@ struct ksmbd_work {
/* Request is encrypted */
bool encrypted:1;
/* Is this SYNC or ASYNC ksmbd_work */
- bool synchronous:1;
+ bool asynchronous:1;
bool need_invalidate_rkey:1;
unsigned int remote_key;
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index 394b6ceac431..0d8242789dc8 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -289,10 +289,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
work->request_buf = conn->request_buf;
conn->request_buf = NULL;
- if (ksmbd_init_smb_server(work)) {
- ksmbd_free_work_struct(work);
- return -EINVAL;
- }
+ ksmbd_init_smb_server(work);
ksmbd_conn_enqueue_request(work);
atomic_inc(&conn->r_count);
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 97c9d1b5bcc0..67b7e766a06b 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -229,9 +229,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
struct smb2_negotiate_rsp *rsp;
struct ksmbd_conn *conn = work->conn;
- if (conn->need_neg == false)
- return -EINVAL;
-
*(__be32 *)work->response_buf =
cpu_to_be32(conn->vals->header_size);
@@ -498,12 +495,6 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
rsp_hdr->SessionId = rcv_hdr->SessionId;
memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
- work->synchronous = true;
- if (work->async_id) {
- ksmbd_release_id(&conn->async_ida, work->async_id);
- work->async_id = 0;
- }
-
return 0;
}
@@ -644,7 +635,7 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
pr_err("Failed to alloc async message id\n");
return id;
}
- work->synchronous = false;
+ work->asynchronous = true;
work->async_id = id;
rsp_hdr->Id.AsyncId = cpu_to_le64(id);
@@ -664,6 +655,24 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
return 0;
}
+void release_async_work(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+
+ spin_lock(&conn->request_lock);
+ list_del_init(&work->async_request_entry);
+ spin_unlock(&conn->request_lock);
+
+ work->asynchronous = 0;
+ work->cancel_fn = NULL;
+ kfree(work->cancel_argv);
+ work->cancel_argv = NULL;
+ if (work->async_id) {
+ ksmbd_release_id(&conn->async_ida, work->async_id);
+ work->async_id = 0;
+ }
+}
+
void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
{
struct smb2_hdr *rsp_hdr;
@@ -867,17 +876,21 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
}
static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
- struct smb2_preauth_neg_context *pneg_ctxt)
+ struct smb2_preauth_neg_context *pneg_ctxt,
+ int len_of_ctxts)
{
- __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
+ /*
+ * sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt,
+ * which may not be present. Only check for used HashAlgorithms[1].
+ */
+ if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN)
+ return STATUS_INVALID_PARAMETER;
- if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) {
- conn->preauth_info->Preauth_HashId =
- SMB2_PREAUTH_INTEGRITY_SHA512;
- err = STATUS_SUCCESS;
- }
+ if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512)
+ return STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
- return err;
+ conn->preauth_info->Preauth_HashId = SMB2_PREAUTH_INTEGRITY_SHA512;
+ return STATUS_SUCCESS;
}
static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
@@ -1005,7 +1018,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
break;
status = decode_preauth_ctxt(conn,
- (struct smb2_preauth_neg_context *)pctx);
+ (struct smb2_preauth_neg_context *)pctx,
+ len_of_ctxts);
if (status != STATUS_SUCCESS)
break;
} else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
@@ -7045,13 +7059,9 @@ skip:
ksmbd_vfs_posix_lock_wait(flock);
- spin_lock(&work->conn->request_lock);
spin_lock(&fp->f_lock);
list_del(&work->fp_entry);
- work->cancel_fn = NULL;
- kfree(argv);
spin_unlock(&fp->f_lock);
- spin_unlock(&work->conn->request_lock);
if (work->state != KSMBD_WORK_ACTIVE) {
list_del(&smb_lock->llist);
@@ -7069,6 +7079,7 @@ skip:
work->send_no_response = 1;
goto out;
}
+
init_smb2_rsp_hdr(work);
smb2_set_err_rsp(work);
rsp->hdr.Status =
@@ -7081,7 +7092,7 @@ skip:
spin_lock(&work->conn->llist_lock);
list_del(&smb_lock->clist);
spin_unlock(&work->conn->llist_lock);
-
+ release_async_work(work);
goto retry;
} else if (!rc) {
spin_lock(&work->conn->llist_lock);
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 0c8a770fe318..9420dd2813fb 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -486,6 +486,7 @@ int find_matching_smb2_dialect(int start_index, __le16 *cli_dialects,
struct file_lock *smb_flock_init(struct file *f);
int setup_async_work(struct ksmbd_work *work, void (*fn)(void **),
void **arg);
+void release_async_work(struct ksmbd_work *work);
void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status);
struct channel *lookup_chann_list(struct ksmbd_session *sess,
struct ksmbd_conn *conn);
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index 9c1ce6d199ce..af0c2a9b8529 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -283,20 +283,121 @@ err_out:
return BAD_PROT_ID;
}
-int ksmbd_init_smb_server(struct ksmbd_work *work)
+#define SMB_COM_NEGOTIATE_EX 0x0
+
+/**
+ * get_smb1_cmd_val() - get smb command value from smb header
+ * @work: smb work containing smb header
+ *
+ * Return: smb command value
+ */
+static u16 get_smb1_cmd_val(struct ksmbd_work *work)
{
- struct ksmbd_conn *conn = work->conn;
+ return SMB_COM_NEGOTIATE_EX;
+}
- if (conn->need_neg == false)
+/**
+ * init_smb1_rsp_hdr() - initialize smb negotiate response header
+ * @work: smb work containing smb request
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+static int init_smb1_rsp_hdr(struct ksmbd_work *work)
+{
+ struct smb_hdr *rsp_hdr = (struct smb_hdr *)work->response_buf;
+ struct smb_hdr *rcv_hdr = (struct smb_hdr *)work->request_buf;
+
+ /*
+ * Remove 4 byte direct TCP header.
+ */
+ *(__be32 *)work->response_buf =
+ cpu_to_be32(sizeof(struct smb_hdr) - 4);
+
+ rsp_hdr->Command = SMB_COM_NEGOTIATE;
+ *(__le32 *)rsp_hdr->Protocol = SMB1_PROTO_NUMBER;
+ rsp_hdr->Flags = SMBFLG_RESPONSE;
+ rsp_hdr->Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS |
+ SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME;
+ rsp_hdr->Pid = rcv_hdr->Pid;
+ rsp_hdr->Mid = rcv_hdr->Mid;
+ return 0;
+}
+
+/**
+ * smb1_check_user_session() - check for valid session for a user
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb1_check_user_session(struct ksmbd_work *work)
+{
+ unsigned int cmd = work->conn->ops->get_cmd_val(work);
+
+ if (cmd == SMB_COM_NEGOTIATE_EX)
return 0;
- init_smb3_11_server(conn);
+ return -EINVAL;
+}
+
+/**
+ * smb1_allocate_rsp_buf() - allocate response buffer for a command
+ * @work: smb work containing smb request
+ *
+ * Return: 0 on success, otherwise -ENOMEM
+ */
+static int smb1_allocate_rsp_buf(struct ksmbd_work *work)
+{
+ work->response_buf = kmalloc(MAX_CIFS_SMALL_BUFFER_SIZE,
+ GFP_KERNEL | __GFP_ZERO);
+ work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+
+ if (!work->response_buf) {
+ pr_err("Failed to allocate %u bytes buffer\n",
+ MAX_CIFS_SMALL_BUFFER_SIZE);
+ return -ENOMEM;
+ }
- if (conn->ops->get_cmd_val(work) != SMB_COM_NEGOTIATE)
- conn->need_neg = false;
return 0;
}
+static struct smb_version_ops smb1_server_ops = {
+ .get_cmd_val = get_smb1_cmd_val,
+ .init_rsp_hdr = init_smb1_rsp_hdr,
+ .allocate_rsp_buf = smb1_allocate_rsp_buf,
+ .check_user_session = smb1_check_user_session,
+};
+
+static int smb1_negotiate(struct ksmbd_work *work)
+{
+ return ksmbd_smb_negotiate_common(work, SMB_COM_NEGOTIATE);
+}
+
+static struct smb_version_cmds smb1_server_cmds[1] = {
+ [SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, },
+};
+
+static void init_smb1_server(struct ksmbd_conn *conn)
+{
+ conn->ops = &smb1_server_ops;
+ conn->cmds = smb1_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb1_server_cmds);
+}
+
+void ksmbd_init_smb_server(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ __le32 proto;
+
+ if (conn->need_neg == false)
+ return;
+
+ proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol;
+ if (proto == SMB1_PROTO_NUMBER)
+ init_smb1_server(conn);
+ else
+ init_smb3_11_server(conn);
+}
+
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
struct ksmbd_file *dir,
struct ksmbd_dir_info *d_info,
@@ -444,20 +545,10 @@ static int smb_handle_negotiate(struct ksmbd_work *work)
ksmbd_debug(SMB, "Unsupported SMB1 protocol\n");
- /*
- * Remove 4 byte direct TCP header, add 2 byte bcc and
- * 2 byte DialectIndex.
- */
- *(__be32 *)work->response_buf =
- cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2);
+ /* Add 2 byte bcc and 2 byte DialectIndex. */
+ inc_rfc1001_len(work->response_buf, 4);
neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
- neg_rsp->hdr.Command = SMB_COM_NEGOTIATE;
- *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER;
- neg_rsp->hdr.Flags = SMBFLG_RESPONSE;
- neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS |
- SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME;
-
neg_rsp->hdr.WordCount = 1;
neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect);
neg_rsp->ByteCount = 0;
@@ -474,23 +565,12 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
if (command == SMB2_NEGOTIATE_HE) {
- struct smb2_hdr *smb2_hdr = smb2_get_msg(work->request_buf);
-
- if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
- ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
- command = SMB_COM_NEGOTIATE;
- }
- }
-
- if (command == SMB2_NEGOTIATE_HE) {
ret = smb2_handle_negotiate(work);
- init_smb2_neg_rsp(work);
return ret;
}
if (command == SMB_COM_NEGOTIATE) {
if (__smb2_negotiate(conn)) {
- conn->need_neg = true;
init_smb3_11_server(conn);
init_smb2_neg_rsp(work);
ksmbd_debug(SMB, "Upgrade to SMB2 negotiation\n");
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index d30ce4c1a151..9130d2e3cd78 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn);
int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
-int ksmbd_init_smb_server(struct ksmbd_work *work);
+void ksmbd_init_smb_server(struct ksmbd_work *work);
struct ksmbd_kstat;
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
diff --git a/fs/ksmbd/unicode.c b/fs/ksmbd/unicode.c
index a0db699ddafd..9ae676906ed3 100644
--- a/fs/ksmbd/unicode.c
+++ b/fs/ksmbd/unicode.c
@@ -114,24 +114,6 @@ cp_convert:
}
/*
- * is_char_allowed() - check for valid character
- * @ch: input character to be checked
- *
- * Return: 1 if char is allowed, otherwise 0
- */
-static inline int is_char_allowed(char *ch)
-{
- /* check for control chars, wildcards etc. */
- if (!(*ch & 0x80) &&
- (*ch <= 0x1f ||
- *ch == '?' || *ch == '"' || *ch == '<' ||
- *ch == '>' || *ch == '|'))
- return 0;
-
- return 1;
-}
-
-/*
* smb_from_utf16() - convert utf16le string to local charset
* @to: destination buffer
* @from: source buffer
diff --git a/fs/namespace.c b/fs/namespace.c
index bc0f15257b49..6836e937ee61 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4183,9 +4183,9 @@ out:
unlock_mount_hash();
if (kattr->propagation) {
- namespace_unlock();
if (err)
cleanup_group_ids(mnt, NULL);
+ namespace_unlock();
}
return err;
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index e9a45dea748a..8a4c86687429 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -139,7 +139,7 @@ static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
size_t seg = min_t(size_t, PAGE_SIZE - off, len);
*pages++ = NULL;
- sg_set_page(sg, page, len, off);
+ sg_set_page(sg, page, seg, off);
sgtable->nents++;
sg++;
len -= seg;
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 450d6c3bc05e..c1c7ed2fd860 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -75,7 +75,6 @@ config NFS_V3_ACL
config NFS_V4
tristate "NFS client support for NFS version 4"
depends on NFS_FS
- select RPCSEC_GSS_KRB5
select KEYS
help
This option enables support for version 4 of the NFS protocol
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 22a93ae46cd7..5607b1e2b821 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (!data->rpc_done) {
if (data->rpc_status)
return ERR_PTR(data->rpc_status);
- /* cached opens have already been processed */
- goto update;
+ return nfs4_try_open_cached(data);
}
ret = nfs_refresh_inode(inode, &data->f_attr);
@@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
-update:
+
if (!update_open_stateid(state, &data->o_res.stateid,
NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 04697f8dc37d..01d7fd108cf3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
out_free_dev:
kfree(dev);
+ gdp->gd_device = NULL;
return ret;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2a815f5a52c4..4039ffcf90ba 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -946,8 +946,8 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
if (!kcred)
return NULL;
- kcred->uid = ses->se_cb_sec.uid;
- kcred->gid = ses->se_cb_sec.gid;
+ kcred->fsuid = ses->se_cb_sec.uid;
+ kcred->fsgid = ses->se_cb_sec.gid;
return kcred;
}
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e12e5a4ad502..e2e485167ac4 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2476,10 +2476,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i];
op->replay = NULL;
+ op->opdesc = NULL;
if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
return false;
if (nfsd4_opnum_in_range(argp, op)) {
+ op->opdesc = OPDESC(op);
op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
if (op->status != nfs_ok)
trace_nfsd_compound_decode_err(argp->rqstp,
@@ -2490,7 +2492,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
}
- op->opdesc = OPDESC(op);
+
/*
* We'll try to cache the result in the DRC if any one
* op in the compound wants to be cached:
@@ -5400,10 +5402,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
__be32 *p;
p = xdr_reserve_space(xdr, 8);
- if (!p) {
- WARN_ON_ONCE(1);
- return;
- }
+ if (!p)
+ goto release;
*p++ = cpu_to_be32(op->opnum);
post_err_offset = xdr->buf->len;
@@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
op->status = encoder(resp, op->status, &op->u);
if (op->status)
trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
- if (opdesc && opdesc->op_release)
- opdesc->op_release(&op->u);
xdr_commit_encode(xdr);
/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -5460,6 +5458,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
}
status:
*p = op->status;
+release:
+ if (opdesc && opdesc->op_release)
+ opdesc->op_release(&op->u);
}
/*
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 2681a449edc1..13592e82eaf6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2219,6 +2219,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
/* on-disk format */
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = level;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index a35f2795b242..4c85914f2abc 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -314,6 +314,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = 0;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 19446a8243d7..228659612c0d 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
* The current segment is filled up
* (internal code)
*/
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -1533,6 +1552,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
@@ -2609,11 +2629,10 @@ static int nilfs_segctor_thread(void *arg)
goto loop;
end_thread:
- spin_unlock(&sci->sc_state_lock);
-
/* end sync. */
sci->sc_task = NULL;
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
+ spin_unlock(&sci->sc_state_lock);
return 0;
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1422b8ba24ed..77f1e5778d1c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -482,6 +482,7 @@ static void nilfs_put_super(struct super_block *sb)
up_write(&nilfs->ns_sem);
}
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -1105,6 +1106,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 3a4c9c150cbf..2894152a6b25 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -87,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
- nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -305,6 +304,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
}
+ err = nilfs_sysfs_create_device_group(sb);
+ if (unlikely(err))
+ goto sysfs_error;
+
if (valid_fs)
goto skip_recovery;
@@ -366,6 +369,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
+
+ sysfs_error:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
@@ -697,10 +703,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- err = nilfs_sysfs_create_device_group(sb);
- if (err)
- goto failed_sbh;
-
set_nilfs_init(nilfs);
err = 0;
out:
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 44d1ee429eb0..40f9e1a2ebdd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1955,8 +1955,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
- /* Ignore unsupported features (userspace built against newer kernel) */
- features = uffdio_api.features & UFFD_API_FEATURES;
+ features = uffdio_api.features;
+ ret = -EINVAL;
+ if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
goto err_out;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index 8ed9bec03e53..ff5a8da5d883 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -59,8 +59,6 @@ extern void acpi_video_unregister(void);
extern void acpi_video_register_backlight(void);
extern int acpi_video_get_edid(struct acpi_device *device, int type,
int device_id, void **edid);
-extern enum acpi_backlight_type acpi_video_get_backlight_type(void);
-extern bool acpi_video_backlight_use_native(void);
/*
* Note: The value returned by acpi_video_handles_brightness_key_presses()
* may change over time and should not be cached.
@@ -69,6 +67,19 @@ extern bool acpi_video_handles_brightness_key_presses(void);
extern int acpi_video_get_levels(struct acpi_device *device,
struct acpi_video_device_brightness **dev_br,
int *pmax_level);
+
+extern enum acpi_backlight_type __acpi_video_get_backlight_type(bool native,
+ bool *auto_detect);
+
+static inline enum acpi_backlight_type acpi_video_get_backlight_type(void)
+{
+ return __acpi_video_get_backlight_type(false, NULL);
+}
+
+static inline bool acpi_video_backlight_use_native(void)
+{
+ return __acpi_video_get_backlight_type(true, NULL) == acpi_backlight_native;
+}
#else
static inline void acpi_video_report_nolcd(void) { return; };
static inline int acpi_video_register(void) { return -ENODEV; }
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 04b8be9f1a77..e271d6708c87 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -130,7 +130,7 @@ ATOMIC_OP(xor, ^)
#define arch_atomic_read(v) READ_ONCE((v)->counter)
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v)))
-#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new)))
+#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (u32)(v)))
+#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new)))
#endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index c3e7315b7c1d..3df9f59a544e 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -26,16 +26,16 @@ static inline unsigned long __generic_cmpxchg_local(volatile void *ptr,
raw_local_irq_save(flags);
switch (size) {
case 1: prev = *(u8 *)ptr;
- if (prev == (u8)old)
- *(u8 *)ptr = (u8)new;
+ if (prev == (old & 0xffu))
+ *(u8 *)ptr = (new & 0xffu);
break;
case 2: prev = *(u16 *)ptr;
- if (prev == (u16)old)
- *(u16 *)ptr = (u16)new;
+ if (prev == (old & 0xffffu))
+ *(u16 *)ptr = (new & 0xffffu);
break;
case 4: prev = *(u32 *)ptr;
- if (prev == (u32)old)
- *(u32 *)ptr = (u32)new;
+ if (prev == (old & 0xffffffffffu))
+ *(u32 *)ptr = (new & 0xffffffffu);
break;
case 8: prev = *(u64 *)ptr;
if (prev == old)
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index dca4419922a9..848de25fc4bf 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -32,7 +32,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
#else
local_irq_save(flags);
ret = *(volatile u8 *)ptr;
- *(volatile u8 *)ptr = x;
+ *(volatile u8 *)ptr = (x & 0xffu);
local_irq_restore(flags);
return ret;
#endif /* __xchg_u8 */
@@ -43,7 +43,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
#else
local_irq_save(flags);
ret = *(volatile u16 *)ptr;
- *(volatile u16 *)ptr = x;
+ *(volatile u16 *)ptr = (x & 0xffffu);
local_irq_restore(flags);
return ret;
#endif /* __xchg_u16 */
@@ -54,7 +54,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
#else
local_irq_save(flags);
ret = *(volatile u32 *)ptr;
- *(volatile u32 *)ptr = x;
+ *(volatile u32 *)ptr = (x & 0xffffffffu);
local_irq_restore(flags);
return ret;
#endif /* __xchg_u32 */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 4c44a29b5e8e..587e7e9b9a37 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -236,7 +236,7 @@ static inline u64 readq(const volatile void __iomem *addr)
log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
__io_br();
- val = __le64_to_cpu(__raw_readq(addr));
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
__io_ar(val);
log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
return val;
@@ -287,7 +287,7 @@ static inline void writeq(u64 value, volatile void __iomem *addr)
{
log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
__io_bw();
- __raw_writeq(__cpu_to_le64(value), addr);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
__io_aw();
log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
}
@@ -319,7 +319,7 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr)
u16 val;
log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
- val = __le16_to_cpu(__raw_readw(addr));
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
return val;
}
@@ -332,7 +332,7 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr)
u32 val;
log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
- val = __le32_to_cpu(__raw_readl(addr));
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
return val;
}
@@ -345,7 +345,7 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr)
u64 val;
log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
- val = __le64_to_cpu(__raw_readq(addr));
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
return val;
}
@@ -366,7 +366,7 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
{
log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
- __raw_writew(cpu_to_le16(value), addr);
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
}
#endif
@@ -376,7 +376,7 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
{
log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
- __raw_writel(__cpu_to_le32(value), addr);
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
}
#endif
@@ -386,7 +386,7 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
{
log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
- __raw_writeq(__cpu_to_le64(value), addr);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
}
#endif
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6792a7940e1e..2c6095bd7d69 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -96,11 +96,11 @@ struct bpf_map_ops {
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
- int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
- int (*map_delete_elem)(struct bpf_map *map, void *key);
- int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
- int (*map_pop_elem)(struct bpf_map *map, void *value);
- int (*map_peek_elem)(struct bpf_map *map, void *value);
+ long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
+ long (*map_delete_elem)(struct bpf_map *map, void *key);
+ long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+ long (*map_pop_elem)(struct bpf_map *map, void *value);
+ long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
/* funcs called by prog_array and perf_event_array map */
@@ -139,7 +139,7 @@ struct bpf_map_ops {
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
- int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
+ long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
@@ -157,7 +157,7 @@ struct bpf_map_ops {
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
- int (*map_for_each_callback)(struct bpf_map *map,
+ long (*map_for_each_callback)(struct bpf_map *map,
bpf_callback_t callback_fn,
void *callback_ctx, u64 flags);
@@ -189,9 +189,14 @@ enum btf_field_type {
BPF_RB_NODE | BPF_RB_ROOT,
};
+typedef void (*btf_dtor_kfunc_t)(void *);
+
struct btf_field_kptr {
struct btf *btf;
struct module *module;
+ /* dtor used if btf_is_kernel(btf), otherwise the type is
+ * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used
+ */
btf_dtor_kfunc_t dtor;
u32 btf_id;
};
@@ -888,8 +893,7 @@ struct bpf_verifier_ops {
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
};
struct bpf_prog_offload_ops {
@@ -1098,6 +1102,7 @@ struct bpf_trampoline {
struct bpf_attach_target_info {
struct btf_func_model fmodel;
long tgt_addr;
+ struct module *tgt_mod;
const char *tgt_name;
const struct btf_type *tgt_type;
};
@@ -1401,6 +1406,7 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ struct module *mod;
u32 num_exentries;
struct exception_table_entry *extable;
union {
@@ -1469,6 +1475,8 @@ struct bpf_link_ops {
void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
int (*fill_link_info)(const struct bpf_link *link,
struct bpf_link_info *info);
+ int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
+ struct bpf_map *old_map);
};
struct bpf_tramp_link {
@@ -1511,6 +1519,8 @@ struct bpf_struct_ops {
void *kdata, const void *udata);
int (*reg)(void *kdata);
void (*unreg)(void *kdata);
+ int (*update)(void *kdata, void *old_kdata);
+ int (*validate)(void *kdata);
const struct btf_type *type;
const struct btf_type *value_type;
const char *name;
@@ -1545,6 +1555,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
else
module_put(owner);
}
+int bpf_struct_ops_link_create(union bpf_attr *attr);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -1585,6 +1596,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
{
return -EINVAL;
}
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
@@ -1617,8 +1633,12 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
-/* Maximum number of loops for bpf_loop */
-#define BPF_MAX_LOOPS BIT(23)
+/* Maximum number of loops for bpf_loop and bpf_iter_num.
+ * It's enum to expose it (and thus make it discoverable) through BTF.
+ */
+enum {
+ BPF_MAX_LOOPS = 8 * 1024 * 1024,
+};
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
BPF_F_RDONLY_PROG | \
@@ -1921,7 +1941,7 @@ void bpf_prog_free_id(struct bpf_prog *prog);
void bpf_map_free_id(struct bpf_map *map);
struct btf_field *btf_record_find(const struct btf_record *rec,
- u32 offset, enum btf_field_type type);
+ u32 offset, u32 field_mask);
void btf_record_free(struct btf_record *rec);
void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
@@ -1934,6 +1954,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map);
void bpf_map_inc_with_uref(struct bpf_map *map);
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
@@ -2154,7 +2175,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
@@ -2242,7 +2263,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
const struct btf *need_btf, u32 need_type_id,
@@ -2281,7 +2302,7 @@ struct bpf_core_ctx {
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, const char *suffix);
+ const char *field_name, u32 btf_id, const char *suffix);
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
const struct btf *reg_btf, u32 reg_id,
@@ -2496,7 +2517,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
static inline int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ u32 *next_btf_id, enum bpf_type_flag *flag,
+ const char **field_name)
{
return -EACCES;
}
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index d934248b8e81..173ec7f43ed1 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
+#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
@@ -55,6 +56,9 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
+ struct bpf_mem_alloc selem_ma;
+ struct bpf_mem_alloc storage_ma;
+ bool bpf_ma;
};
struct bpf_local_storage_data {
@@ -83,6 +87,7 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
+ struct bpf_local_storage_map __rcu *smap;
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
@@ -121,14 +126,15 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache);
+ struct bpf_local_storage_cache *cache,
+ bool bpf_ma);
struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
-bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage);
+void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
struct bpf_local_storage_cache *cache,
@@ -142,17 +148,19 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
-
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
bool charge_mem, gfp_t gfp_flags);
+void bpf_selem_free(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ bool reuse_now);
+
int
bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
@@ -163,7 +171,6 @@ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags, gfp_t gfp_flags);
-void bpf_local_storage_free_rcu(struct rcu_head *rcu);
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
#endif /* _BPF_LOCAL_STORAGE_H */
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index a7104af61ab4..3929be5743f4 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_cache_raw_free(void *ptr);
+void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
#endif /* _BPF_MEM_ALLOC_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 18538bad2b8c..f03852b89d28 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -59,6 +59,14 @@ struct bpf_active_lock {
u32 id;
};
+#define ITER_PREFIX "bpf_iter_"
+
+enum bpf_iter_state {
+ BPF_ITER_STATE_INVALID, /* for non-first slot */
+ BPF_ITER_STATE_ACTIVE,
+ BPF_ITER_STATE_DRAINED,
+};
+
struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
@@ -103,6 +111,18 @@ struct bpf_reg_state {
bool first_slot;
} dynptr;
+ /* For bpf_iter stack slots */
+ struct {
+ /* BTF container and BTF type ID describing
+ * struct bpf_iter_<type> of an iterator state
+ */
+ struct btf *btf;
+ u32 btf_id;
+ /* packing following two fields to fit iter state into 16 bytes */
+ enum bpf_iter_state state:2;
+ int depth:30;
+ } iter;
+
/* Max size from any of the above. */
struct {
unsigned long raw1;
@@ -141,6 +161,8 @@ struct bpf_reg_state {
* same reference to the socket, to determine proper reference freeing.
* For stack slots that are dynptrs, this is used to track references to
* the dynptr to determine proper reference freeing.
+ * Similarly to dynptrs, we use ID to track "belonging" of a reference
+ * to a specific instance of bpf_iter.
*/
u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
@@ -211,9 +233,11 @@ enum bpf_stack_slot_type {
* is stored in bpf_stack_state->spilled_ptr.dynptr.type
*/
STACK_DYNPTR,
+ STACK_ITER,
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
+
#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern)
#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE)
@@ -448,12 +472,17 @@ struct bpf_insn_aux_data {
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
+ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
- bool prune_point;
bool jmp_point;
+ bool prune_point;
+ /* ensure we check state equivalence and save state checkpoint and
+ * this instruction, regardless of any heuristics
+ */
+ bool force_checkpoint;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -462,39 +491,36 @@ struct bpf_insn_aux_data {
#define BPF_VERIFIER_TMP_LOG_SIZE 1024
struct bpf_verifier_log {
- u32 level;
- char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
+ /* Logical start and end positions of a "log window" of the verifier log.
+ * start_pos == 0 means we haven't truncated anything.
+ * Once truncation starts to happen, start_pos + len_total == end_pos,
+ * except during log reset situations, in which (end_pos - start_pos)
+ * might get smaller than len_total (see bpf_vlog_reset()).
+ * Generally, (end_pos - start_pos) gives number of useful data in
+ * user log buffer.
+ */
+ u64 start_pos;
+ u64 end_pos;
char __user *ubuf;
- u32 len_used;
+ u32 level;
u32 len_total;
+ u32 len_max;
+ char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
};
-static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
-{
- return log->len_used >= log->len_total - 1;
-}
-
#define BPF_LOG_LEVEL1 1
#define BPF_LOG_LEVEL2 2
#define BPF_LOG_STATS 4
+#define BPF_LOG_FIXED 8
#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
-#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS | BPF_LOG_FIXED)
#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */
#define BPF_LOG_MIN_ALIGNMENT 8U
#define BPF_LOG_ALIGNMENT 40U
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
- return log &&
- ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
- log->level == BPF_LOG_KERNEL);
-}
-
-static inline bool
-bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
-{
- return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
- log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
+ return log && log->level;
}
#define BPF_MAX_SUBPROGS 256
@@ -574,7 +600,7 @@ struct bpf_verifier_env {
u32 scratched_regs;
/* Same as scratched_regs but for stack slots */
u64 scratched_stack_slots;
- u32 prev_log_len, prev_insn_print_len;
+ u64 prev_log_pos, prev_insn_print_pos;
/* buffer used in reg_type_str() to generate reg_type string */
char type_str_buf[TYPE_STR_BUF_LEN];
};
@@ -585,6 +611,10 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
const char *fmt, ...);
+int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
+ char __user *log_buf, u32 log_size);
+void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos);
+int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual);
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 556b3e2e7471..495250162422 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -71,6 +71,10 @@
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
+/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */
+#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */
+#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */
+#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
@@ -117,13 +121,11 @@ struct btf_struct_metas {
struct btf_struct_meta types[];
};
-typedef void (*btf_dtor_kfunc_t)(void *);
-
extern const struct file_operations btf_fops;
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr);
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
struct btf *btf_get_by_fd(int fd);
int btf_get_info_by_fd(const struct btf *btf,
const union bpf_attr *attr,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c6fab004104a..5b2f8147d1ae 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -218,7 +218,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_X86_IDXD_ONLINE,
- CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 798d35890118..62b61527bcc4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -711,6 +711,7 @@ struct ethtool_mm_stats {
* @get_dump_data: Get dump data.
* @set_dump: Set dump specific flags to the device.
* @get_ts_info: Get the time stamping and PTP hardware clock capabilities.
+ * It may be called with RCU, or rtnl or reference on the device.
* Drivers supporting transmit time stamps in software should set this to
* ethtool_op_get_ts_info().
* @get_module_info: Get the size and type of the eeprom contained within
diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index 17003b385756..fae0dfb9a9c8 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -39,6 +39,7 @@ void ethtool_aggregate_pause_stats(struct net_device *dev,
struct ethtool_pause_stats *pause_stats);
void ethtool_aggregate_rmon_stats(struct net_device *dev,
struct ethtool_rmon_stats *rmon_stats);
+bool ethtool_dev_mm_supported(struct net_device *dev);
#else
static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
@@ -112,5 +113,10 @@ ethtool_aggregate_rmon_stats(struct net_device *dev,
{
}
+static inline bool ethtool_dev_mm_supported(struct net_device *dev)
+{
+ return false;
+}
+
#endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */
#endif /* _LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index efa5d4a1677e..5364b0c52c1d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -571,8 +571,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
extern struct mutex nf_conn_btf_access_lock;
extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
const struct bpf_insn *insnsi,
@@ -1504,9 +1503,9 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
- u64 flags, const u64 flag_mask,
- void *lookup_elem(struct bpf_map *map, u32 key))
+static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
+ u64 flags, const u64 flag_mask,
+ void *lookup_elem(struct bpf_map *map, u32 key))
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 366c730beaa3..402fc061de75 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled)
#define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
#define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
-static inline unsigned long get_lock_parent_ip(void)
+static __always_inline unsigned long get_lock_parent_ip(void)
{
unsigned long addr = CALLER_ADDR0;
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index c1b62384b6ee..492dd27a5dd8 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -430,7 +430,7 @@ struct hwmon_channel_info {
*/
struct hwmon_chip_info {
const struct hwmon_ops *ops;
- const struct hwmon_channel_info **info;
+ const struct hwmon_channel_info * const *info;
};
/* hwmon_device_register() is deprecated */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 1668ac4d7adc..3ff96ae31bf6 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -60,6 +60,7 @@ struct br_ip_list {
#define BR_TX_FWD_OFFLOAD BIT(20)
#define BR_PORT_LOCKED BIT(21)
#define BR_PORT_MAB BIT(22)
+#define BR_NEIGH_VLAN_SUPPRESS BIT(23)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index e38ae3c34618..30b17647ce3c 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr);
* @page_shift: page_shift passed to vmap_range_noflush().
*
* KMSAN maps shadow and origin pages of @pages into contiguous ranges in
- * vmalloc metadata address range.
+ * vmalloc metadata address range. Returns 0 on success, callers must check
+ * for non-zero return value.
*/
-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
- pgprot_t prot, struct page **pages,
- unsigned int page_shift);
+int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages,
+ unsigned int page_shift);
/**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
@@ -159,11 +160,12 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end);
* @page_shift: page_shift argument passed to vmap_range_noflush().
*
* KMSAN creates new metadata pages for the physical pages mapped into the
- * virtual memory.
+ * virtual memory. Returns 0 on success, callers must check for non-zero return
+ * value.
*/
-void kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int page_shift);
+int kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int page_shift);
/**
* kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
@@ -281,12 +283,13 @@ static inline void kmsan_kfree_large(const void *ptr)
{
}
-static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
- unsigned long end,
- pgprot_t prot,
- struct page **pages,
- unsigned int page_shift)
+static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
+ unsigned long end,
+ pgprot_t prot,
+ struct page **pages,
+ unsigned int page_shift)
{
+ return 0;
}
static inline void kmsan_vunmap_range_noflush(unsigned long start,
@@ -294,12 +297,12 @@ static inline void kmsan_vunmap_range_noflush(unsigned long start,
{
}
-static inline void kmsan_ioremap_page_range(unsigned long start,
- unsigned long end,
- phys_addr_t phys_addr,
- pgprot_t prot,
- unsigned int page_shift)
+static inline int kmsan_ioremap_page_range(unsigned long start,
+ unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int page_shift)
{
+ return 0;
}
static inline void kmsan_iounmap_page_range(unsigned long start,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8ada23756b0e..a9adf75344be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -755,6 +755,7 @@ struct kvm {
struct {
spinlock_t lock;
struct list_head items;
+ /* resampler_list update side is protected by resampler_lock. */
struct list_head resampler_list;
struct mutex resampler_lock;
} irqfds;
@@ -1986,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
#ifdef CONFIG_HAVE_KVM_IRQFD
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
+bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+ unsigned int irqchip,
+ unsigned int pin);
void kvm_irq_routing_update(struct kvm *);
#else
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
@@ -1994,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
}
static inline void kvm_irqfd_release(struct kvm *kvm) {}
+
+static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+ unsigned int irqchip,
+ unsigned int pin)
+{
+ return false;
+}
#endif
#else
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index dac047abdba7..8ad43692e3bb 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler {
/*
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
* resamplers among irqfds on the same gsi.
- * Accessed and modified under kvm->irqfds.resampler_lock
+ * RCU list modified under kvm->irqfds.resampler_lock
*/
struct list_head link;
};
diff --git a/include/linux/leds.h b/include/linux/leds.h
index d71201a968b6..aa48e643f655 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -82,7 +82,15 @@ struct led_init_data {
bool devname_mandatory;
};
+#if IS_ENABLED(CONFIG_NEW_LEDS)
enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode);
+#else
+static inline enum led_default_state
+led_init_default_state_get(struct fwnode_handle *fwnode)
+{
+ return LEDS_DEFSTATE_OFF;
+}
+#endif
struct led_hw_trigger_type {
int dummy;
@@ -217,9 +225,19 @@ static inline int led_classdev_register(struct device *parent,
return led_classdev_register_ext(parent, led_cdev, NULL);
}
+#if IS_ENABLED(CONFIG_LEDS_CLASS)
int devm_led_classdev_register_ext(struct device *parent,
struct led_classdev *led_cdev,
struct led_init_data *init_data);
+#else
+static inline int
+devm_led_classdev_register_ext(struct device *parent,
+ struct led_classdev *led_cdev,
+ struct led_init_data *init_data)
+{
+ return 0;
+}
+#endif
static inline int devm_led_classdev_register(struct device *parent,
struct led_classdev *led_cdev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index af4dd536a52c..c0af74efd3cb 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <rdma/ib_verbs.h>
#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/bitfield.h>
#if defined(__LITTLE_ENDIAN)
#define MLX5_SET_HOST_ENDIANNESS 0
@@ -442,6 +443,8 @@ enum {
MLX5_OPCODE_UMR = 0x25,
+ MLX5_OPCODE_FLOW_TBL_ACCESS = 0x2c,
+
MLX5_OPCODE_ACCESS_ASO = 0x2d,
};
@@ -980,14 +983,23 @@ enum {
};
enum {
- CQE_RSS_HTYPE_IP = 0x3 << 2,
+ CQE_RSS_HTYPE_IP = GENMASK(3, 2),
/* cqe->rss_hash_type[3:2] - IP destination selected for hash
* (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved)
*/
- CQE_RSS_HTYPE_L4 = 0x3 << 6,
+ CQE_RSS_IP_NONE = 0x0,
+ CQE_RSS_IPV4 = 0x1,
+ CQE_RSS_IPV6 = 0x2,
+ CQE_RSS_RESERVED = 0x3,
+
+ CQE_RSS_HTYPE_L4 = GENMASK(7, 6),
/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
* (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
*/
+ CQE_RSS_L4_NONE = 0x0,
+ CQE_RSS_L4_TCP = 0x1,
+ CQE_RSS_L4_UDP = 0x2,
+ CQE_RSS_L4_IPSEC = 0x3,
};
enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f243bd10a5e1..4c3636d2066b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -751,6 +751,7 @@ enum {
struct mlx5_profile {
u64 mask;
u8 log_max_qp;
+ u8 num_cmd_caches;
struct {
int size;
int limit;
@@ -1214,11 +1215,6 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev)
return dev->coredev_type == MLX5_COREDEV_VF;
}
-static inline bool mlx5_core_is_management_pf(const struct mlx5_core_dev *dev)
-{
- return MLX5_CAP_GEN(dev, num_ports) == 1 && !MLX5_CAP_GEN(dev, native_port_num);
-}
-
static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev)
{
return dev->caps.embedded_cpu;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e47d6c58da35..20d00e09b168 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -78,12 +78,15 @@ enum {
enum {
MLX5_OBJ_TYPE_SW_ICM = 0x0008,
+ MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23,
};
enum {
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13),
+ MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT =
+ (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT),
MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39),
};
@@ -321,6 +324,10 @@ enum {
MLX5_FT_NIC_TX_RDMA_2_NIC_TX = BIT(1),
};
+enum {
+ MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT = 0x1,
+};
+
struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_dmac[0x1];
u8 outer_smac[0x1];
@@ -456,9 +463,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_ft_level[0x8];
u8 reformat_add_esp_trasport[0x1];
- u8 reserved_at_41[0x2];
+ u8 reformat_l2_to_l3_esp_tunnel[0x1];
+ u8 reserved_at_42[0x1];
u8 reformat_del_esp_trasport[0x1];
- u8 reserved_at_44[0x2];
+ u8 reformat_l3_esp_tunnel_to_l2[0x1];
+ u8 reserved_at_45[0x1];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
@@ -880,7 +889,12 @@ enum {
struct mlx5_ifc_flow_table_eswitch_cap_bits {
u8 fdb_to_vport_reg_c_id[0x8];
- u8 reserved_at_8[0xd];
+ u8 reserved_at_8[0x5];
+ u8 fdb_uplink_hairpin[0x1];
+ u8 fdb_multi_path_any_table_limit_regc[0x1];
+ u8 reserved_at_f[0x3];
+ u8 fdb_multi_path_any_table[0x1];
+ u8 reserved_at_13[0x2];
u8 fdb_modify_header_fwd_to_table[0x1];
u8 fdb_ipv4_ttl_modify[0x1];
u8 flow_source[0x1];
@@ -1922,7 +1936,14 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_750[0x4];
u8 max_dynamic_vf_msix_table_size[0xc];
- u8 reserved_at_760[0x20];
+ u8 reserved_at_760[0x3];
+ u8 log_max_num_header_modify_argument[0x5];
+ u8 reserved_at_768[0x4];
+ u8 log_header_modify_argument_granularity[0x4];
+ u8 reserved_at_770[0x3];
+ u8 log_header_modify_argument_max_alloc[0x5];
+ u8 reserved_at_778[0x8];
+
u8 vhca_tunnel_commands[0x40];
u8 match_definer_format_supported[0x40];
};
@@ -6356,6 +6377,18 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_modify_header_arg_bits {
+ u8 reserved_at_0[0x80];
+
+ u8 reserved_at_80[0x8];
+ u8 access_pd[0x18];
+};
+
+struct mlx5_ifc_create_modify_header_arg_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_modify_header_arg_bits arg;
+};
+
struct mlx5_ifc_create_match_definer_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
@@ -6599,7 +6632,9 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
+ MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6,
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
+ MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index df55fbb65717..bd53cf4be7bd 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -499,6 +499,16 @@ struct mlx5_stride_block_ctrl_seg {
__be16 num_entries;
};
+struct mlx5_wqe_flow_update_ctrl_seg {
+ __be32 flow_idx_update;
+ __be32 dest_handle;
+ u8 reserved0[40];
+};
+
+struct mlx5_wqe_header_modify_argument_update_seg {
+ u8 argument_list[64];
+};
+
struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0722859c3647..a57e6ae78e65 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -774,7 +774,8 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
-#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
+#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
+ MT_FLAGS_USE_RCU)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 4435ad9439ab..886d24877c7c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -608,14 +608,6 @@ static inline bool within_module(unsigned long addr, const struct module *mod)
/* Search for module by name: must be in a RCU-sched critical section. */
struct module *find_module(const char *name);
-/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
- symnum out of range. */
-int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
- char *name, char *module_name, int *exported);
-
-/* Look for this name: can be of form module:name. */
-unsigned long module_kallsyms_lookup_name(const char *name);
-
extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
long code);
#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
@@ -662,17 +654,6 @@ static inline void __module_get(struct module *module)
/* Dereference module function descriptor */
void *dereference_module_function_descriptor(struct module *mod, void *ptr);
-/* For kallsyms to ask for address resolution. namebuf should be at
- * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
- * found, otherwise NULL. */
-const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname, const unsigned char **modbuildid,
- char *namebuf);
-int lookup_module_symbol_name(unsigned long addr, char *symname);
-int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
-
int register_module_notifier(struct notifier_block *nb);
int unregister_module_notifier(struct notifier_block *nb);
@@ -763,39 +744,6 @@ static inline void module_put(struct module *module)
#define module_name(mod) "kernel"
-/* For kallsyms to ask for address resolution. NULL means not found. */
-static inline const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
-{
- return NULL;
-}
-
-static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
-{
- return -ERANGE;
-}
-
-static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
-{
- return -ERANGE;
-}
-
-static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
- char *type, char *name,
- char *module_name, int *exported)
-{
- return -ERANGE;
-}
-
-static inline unsigned long module_kallsyms_lookup_name(const char *name)
-{
- return 0;
-}
-
static inline int register_module_notifier(struct notifier_block *nb)
{
/* no events will happen anyway, so this can always succeed */
@@ -891,7 +839,36 @@ int module_kallsyms_on_each_symbol(const char *modname,
int (*fn)(void *, const char *,
struct module *, unsigned long),
void *data);
-#else
+
+/* For kallsyms to ask for address resolution. namebuf should be at
+ * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
+ * found, otherwise NULL.
+ */
+const char *module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, const unsigned char **modbuildid,
+ char *namebuf);
+int lookup_module_symbol_name(unsigned long addr, char *symname);
+int lookup_module_symbol_attrs(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char *modname,
+ char *name);
+
+/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
+ * symnum out of range.
+ */
+int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *name, char *module_name, int *exported);
+
+/* Look for this name: can be of form module:name. */
+unsigned long module_kallsyms_lookup_name(const char *name);
+
+unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name);
+
+#else /* CONFIG_MODULES && CONFIG_KALLSYMS */
+
static inline int module_kallsyms_on_each_symbol(const char *modname,
int (*fn)(void *, const char *,
struct module *, unsigned long),
@@ -899,6 +876,50 @@ static inline int module_kallsyms_on_each_symbol(const char *modname,
{
return -EOPNOTSUPP;
}
+
+/* For kallsyms to ask for address resolution. NULL means not found. */
+static inline const char *module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
+{
+ return NULL;
+}
+
+static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
+{
+ return -ERANGE;
+}
+
+static inline int lookup_module_symbol_attrs(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char *modname,
+ char *name)
+{
+ return -ERANGE;
+}
+
+static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
+ char *type, char *name,
+ char *module_name, int *exported)
+{
+ return -ERANGE;
+}
+
+static inline unsigned long module_kallsyms_lookup_name(const char *name)
+{
+ return 0;
+}
+
+static inline unsigned long find_kallsyms_symbol_value(struct module *mod,
+ const char *name)
+{
+ return 0;
+}
+
#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */
#endif /* _LINUX_MODULE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a740be3bb911..a6a3e9457d6c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,7 +52,7 @@
#include <linux/rbtree.h>
#include <net/net_trackers.h>
#include <net/net_debug.h>
-#include <net/dropreason.h>
+#include <net/dropreason-core.h>
struct netpoll_info;
struct device;
@@ -360,8 +360,11 @@ struct napi_struct {
unsigned long gro_bitmask;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
+ /* CPU actively polling if netpoll is configured */
int poll_owner;
#endif
+ /* CPU on which NAPI has been scheduled for processing */
+ int list_owner;
struct net_device *dev;
struct gro_list gro_hash[GRO_HASH_BUCKETS];
struct sk_buff *skb;
@@ -1650,7 +1653,8 @@ struct net_device_ops {
struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
- int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
+ int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type);
};
/**
@@ -2878,7 +2882,6 @@ enum netdev_cmd {
NETDEV_OFFLOAD_XSTATS_REPORT_USED,
NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
NETDEV_XDP_FEAT_CHANGE,
- NETDEV_PRE_CHANGE_HWTSTAMP,
};
const char *netdev_cmd_to_name(enum netdev_cmd cmd);
@@ -2929,11 +2932,6 @@ struct netdev_notifier_pre_changeaddr_info {
const unsigned char *dev_addr;
};
-struct netdev_notifier_hwtstamp_info {
- struct netdev_notifier_info info; /* must be first */
- struct kernel_hwtstamp_config *config;
-};
-
enum netdev_offload_xstats_type {
NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1,
};
@@ -3341,6 +3339,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
{
+ /* Must be an atomic op see netif_txq_try_stop() */
set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
@@ -3537,7 +3536,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
* netdev_tx_sent_queue will miss the update and cause the queue to
* be stopped forever
*/
- smp_mb();
+ smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */
if (unlikely(dql_avail(&dev_queue->dql) < 0))
return;
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index ed9b4df792b8..43765eaf2342 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -34,6 +34,10 @@ struct pci_doe_mb;
* @work: Used internally by the mailbox
* @doe_mb: Used internally by the mailbox
*
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping. If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
* The payload sizes and rv are specified in bytes with the following
* restrictions concerning the protocol.
*
@@ -45,9 +49,9 @@ struct pci_doe_mb;
*/
struct pci_doe_task {
struct pci_doe_protocol prot;
- u32 *request_pl;
+ __le32 *request_pl;
size_t request_pl_sz;
- u32 *response_pl;
+ __le32 *response_pl;
size_t response_pl_sz;
int rv;
void (*complete)(struct pci_doe_task *task);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b50e5c79f7e3..a5dda515fcd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1624,6 +1624,8 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
flags, NULL);
}
+static inline bool pci_msix_can_alloc_dyn(struct pci_dev *dev)
+{ return false; }
static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
const struct irq_affinity_desc *affdesc)
{
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
new file mode 100644
index 000000000000..98a60ce87b92
--- /dev/null
+++ b/include/linux/pds/pds_adminq.h
@@ -0,0 +1,647 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDS_CORE_ADMINQ_H_
+#define _PDS_CORE_ADMINQ_H_
+
+#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256
+
+enum pds_core_adminq_flags {
+ PDS_AQ_FLAG_FASTPOLL = BIT(1), /* completion poll at 1ms */
+};
+
+/*
+ * enum pds_core_adminq_opcode - AdminQ command opcodes
+ * These commands are only processed on AdminQ, not available in devcmd
+ */
+enum pds_core_adminq_opcode {
+ PDS_AQ_CMD_NOP = 0,
+
+ /* Client control */
+ PDS_AQ_CMD_CLIENT_REG = 6,
+ PDS_AQ_CMD_CLIENT_UNREG = 7,
+ PDS_AQ_CMD_CLIENT_CMD = 8,
+
+ /* LIF commands */
+ PDS_AQ_CMD_LIF_IDENTIFY = 20,
+ PDS_AQ_CMD_LIF_INIT = 21,
+ PDS_AQ_CMD_LIF_RESET = 22,
+ PDS_AQ_CMD_LIF_GETATTR = 23,
+ PDS_AQ_CMD_LIF_SETATTR = 24,
+ PDS_AQ_CMD_LIF_SETPHC = 25,
+
+ PDS_AQ_CMD_RX_MODE_SET = 30,
+ PDS_AQ_CMD_RX_FILTER_ADD = 31,
+ PDS_AQ_CMD_RX_FILTER_DEL = 32,
+
+ /* Queue commands */
+ PDS_AQ_CMD_Q_IDENTIFY = 39,
+ PDS_AQ_CMD_Q_INIT = 40,
+ PDS_AQ_CMD_Q_CONTROL = 41,
+
+ /* SR/IOV commands */
+ PDS_AQ_CMD_VF_GETATTR = 60,
+ PDS_AQ_CMD_VF_SETATTR = 61,
+};
+
+/*
+ * enum pds_core_notifyq_opcode - NotifyQ event codes
+ */
+enum pds_core_notifyq_opcode {
+ PDS_EVENT_LINK_CHANGE = 1,
+ PDS_EVENT_RESET = 2,
+ PDS_EVENT_XCVR = 5,
+ PDS_EVENT_CLIENT = 6,
+};
+
+#define PDS_COMP_COLOR_MASK 0x80
+
+/**
+ * struct pds_core_notifyq_event - Generic event reporting structure
+ * @eid: event number
+ * @ecode: event code
+ *
+ * This is the generic event report struct from which the other
+ * actual events will be formed.
+ */
+struct pds_core_notifyq_event {
+ __le64 eid;
+ __le16 ecode;
+};
+
+/**
+ * struct pds_core_link_change_event - Link change event notification
+ * @eid: event number
+ * @ecode: event code = PDS_EVENT_LINK_CHANGE
+ * @link_status: link up/down, with error bits
+ * @link_speed: speed of the network link
+ *
+ * Sent when the network link state changes between UP and DOWN
+ */
+struct pds_core_link_change_event {
+ __le64 eid;
+ __le16 ecode;
+ __le16 link_status;
+ __le32 link_speed; /* units of 1Mbps: e.g. 10000 = 10Gbps */
+};
+
+/**
+ * struct pds_core_reset_event - Reset event notification
+ * @eid: event number
+ * @ecode: event code = PDS_EVENT_RESET
+ * @reset_code: reset type
+ * @state: 0=pending, 1=complete, 2=error
+ *
+ * Sent when the NIC or some subsystem is going to be or
+ * has been reset.
+ */
+struct pds_core_reset_event {
+ __le64 eid;
+ __le16 ecode;
+ u8 reset_code;
+ u8 state;
+};
+
+/**
+ * struct pds_core_client_event - Client event notification
+ * @eid: event number
+ * @ecode: event code = PDS_EVENT_CLIENT
+ * @client_id: client to sent event to
+ * @client_event: wrapped event struct for the client
+ *
+ * Sent when an event needs to be passed on to a client
+ */
+struct pds_core_client_event {
+ __le64 eid;
+ __le16 ecode;
+ __le16 client_id;
+ u8 client_event[54];
+};
+
+/**
+ * struct pds_core_notifyq_cmd - Placeholder for building qcq
+ * @data: anonymous field for building the qcq
+ */
+struct pds_core_notifyq_cmd {
+ __le32 data; /* Not used but needed for qcq structure */
+};
+
+/*
+ * union pds_core_notifyq_comp - Overlay of notifyq event structures
+ */
+union pds_core_notifyq_comp {
+ struct {
+ __le64 eid;
+ __le16 ecode;
+ };
+ struct pds_core_notifyq_event event;
+ struct pds_core_link_change_event link_change;
+ struct pds_core_reset_event reset;
+ u8 data[64];
+};
+
+#define PDS_DEVNAME_LEN 32
+/**
+ * struct pds_core_client_reg_cmd - Register a new client with DSC
+ * @opcode: opcode PDS_AQ_CMD_CLIENT_REG
+ * @rsvd: word boundary padding
+ * @devname: text name of client device
+ * @vif_type: what type of device (enum pds_core_vif_types)
+ *
+ * Tell the DSC of the new client, and receive a client_id from DSC.
+ */
+struct pds_core_client_reg_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ char devname[PDS_DEVNAME_LEN];
+ u8 vif_type;
+};
+
+/**
+ * struct pds_core_client_reg_comp - Client registration completion
+ * @status: Status of the command (enum pdc_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @client_id: New id assigned by DSC
+ * @rsvd1: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_client_reg_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ __le16 client_id;
+ u8 rsvd1[9];
+ u8 color;
+};
+
+/**
+ * struct pds_core_client_unreg_cmd - Unregister a client from DSC
+ * @opcode: opcode PDS_AQ_CMD_CLIENT_UNREG
+ * @rsvd: word boundary padding
+ * @client_id: id of client being removed
+ *
+ * Tell the DSC this client is going away and remove its context
+ * This uses the generic completion.
+ */
+struct pds_core_client_unreg_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 client_id;
+};
+
+/**
+ * struct pds_core_client_request_cmd - Pass along a wrapped client AdminQ cmd
+ * @opcode: opcode PDS_AQ_CMD_CLIENT_CMD
+ * @rsvd: word boundary padding
+ * @client_id: id of client being removed
+ * @client_cmd: the wrapped client command
+ *
+ * Proxy post an adminq command for the client.
+ * This uses the generic completion.
+ */
+struct pds_core_client_request_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 client_id;
+ u8 client_cmd[60];
+};
+
+#define PDS_CORE_MAX_FRAGS 16
+
+#define PDS_CORE_QCQ_F_INITED BIT(0)
+#define PDS_CORE_QCQ_F_SG BIT(1)
+#define PDS_CORE_QCQ_F_INTR BIT(2)
+#define PDS_CORE_QCQ_F_TX_STATS BIT(3)
+#define PDS_CORE_QCQ_F_RX_STATS BIT(4)
+#define PDS_CORE_QCQ_F_NOTIFYQ BIT(5)
+#define PDS_CORE_QCQ_F_CMB_RINGS BIT(6)
+#define PDS_CORE_QCQ_F_CORE BIT(7)
+
+enum pds_core_lif_type {
+ PDS_CORE_LIF_TYPE_DEFAULT = 0,
+};
+
+/**
+ * union pds_core_lif_config - LIF configuration
+ * @state: LIF state (enum pds_core_lif_state)
+ * @rsvd: Word boundary padding
+ * @name: LIF name
+ * @rsvd2: Word boundary padding
+ * @features: LIF features active (enum pds_core_hw_features)
+ * @queue_count: Queue counts per queue-type
+ * @words: Full union buffer size
+ */
+union pds_core_lif_config {
+ struct {
+ u8 state;
+ u8 rsvd[3];
+ char name[PDS_CORE_IFNAMSIZ];
+ u8 rsvd2[12];
+ __le64 features;
+ __le32 queue_count[PDS_CORE_QTYPE_MAX];
+ } __packed;
+ __le32 words[64];
+};
+
+/**
+ * struct pds_core_lif_status - LIF status register
+ * @eid: most recent NotifyQ event id
+ * @rsvd: full struct size
+ */
+struct pds_core_lif_status {
+ __le64 eid;
+ u8 rsvd[56];
+};
+
+/**
+ * struct pds_core_lif_info - LIF info structure
+ * @config: LIF configuration structure
+ * @status: LIF status structure
+ */
+struct pds_core_lif_info {
+ union pds_core_lif_config config;
+ struct pds_core_lif_status status;
+};
+
+/**
+ * struct pds_core_lif_identity - LIF identity information (type-specific)
+ * @features: LIF features (see enum pds_core_hw_features)
+ * @version: Identify structure version
+ * @hw_index: LIF hardware index
+ * @rsvd: Word boundary padding
+ * @max_nb_sessions: Maximum number of sessions supported
+ * @rsvd2: buffer padding
+ * @config: LIF config struct with features, q counts
+ */
+struct pds_core_lif_identity {
+ __le64 features;
+ u8 version;
+ u8 hw_index;
+ u8 rsvd[2];
+ __le32 max_nb_sessions;
+ u8 rsvd2[120];
+ union pds_core_lif_config config;
+};
+
+/**
+ * struct pds_core_lif_identify_cmd - Get LIF identity info command
+ * @opcode: Opcode PDS_AQ_CMD_LIF_IDENTIFY
+ * @type: LIF type (enum pds_core_lif_type)
+ * @client_id: Client identifier
+ * @ver: Version of identify returned by device
+ * @rsvd: Word boundary padding
+ * @ident_pa: DMA address to receive identity info
+ *
+ * Firmware will copy LIF identity data (struct pds_core_lif_identity)
+ * into the buffer address given.
+ */
+struct pds_core_lif_identify_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 client_id;
+ u8 ver;
+ u8 rsvd[3];
+ __le64 ident_pa;
+};
+
+/**
+ * struct pds_core_lif_identify_comp - LIF identify command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @ver: Version of identify returned by device
+ * @bytes: Bytes copied into the buffer
+ * @rsvd: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_lif_identify_comp {
+ u8 status;
+ u8 ver;
+ __le16 bytes;
+ u8 rsvd[11];
+ u8 color;
+};
+
+/**
+ * struct pds_core_lif_init_cmd - LIF init command
+ * @opcode: Opcode PDS_AQ_CMD_LIF_INIT
+ * @type: LIF type (enum pds_core_lif_type)
+ * @client_id: Client identifier
+ * @rsvd: Word boundary padding
+ * @info_pa: Destination address for LIF info (struct pds_core_lif_info)
+ */
+struct pds_core_lif_init_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 client_id;
+ __le32 rsvd;
+ __le64 info_pa;
+};
+
+/**
+ * struct pds_core_lif_init_comp - LIF init command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @hw_index: Hardware index of the initialized LIF
+ * @rsvd1: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_lif_init_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 hw_index;
+ u8 rsvd1[11];
+ u8 color;
+};
+
+/**
+ * struct pds_core_lif_reset_cmd - LIF reset command
+ * Will reset only the specified LIF.
+ * @opcode: Opcode PDS_AQ_CMD_LIF_RESET
+ * @rsvd: Word boundary padding
+ * @client_id: Client identifier
+ */
+struct pds_core_lif_reset_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 client_id;
+};
+
+/**
+ * enum pds_core_lif_attr - List of LIF attributes
+ * @PDS_CORE_LIF_ATTR_STATE: LIF state attribute
+ * @PDS_CORE_LIF_ATTR_NAME: LIF name attribute
+ * @PDS_CORE_LIF_ATTR_FEATURES: LIF features attribute
+ * @PDS_CORE_LIF_ATTR_STATS_CTRL: LIF statistics control attribute
+ */
+enum pds_core_lif_attr {
+ PDS_CORE_LIF_ATTR_STATE = 0,
+ PDS_CORE_LIF_ATTR_NAME = 1,
+ PDS_CORE_LIF_ATTR_FEATURES = 4,
+ PDS_CORE_LIF_ATTR_STATS_CTRL = 6,
+};
+
+/**
+ * struct pds_core_lif_setattr_cmd - Set LIF attributes on the NIC
+ * @opcode: Opcode PDS_AQ_CMD_LIF_SETATTR
+ * @attr: Attribute type (enum pds_core_lif_attr)
+ * @client_id: Client identifier
+ * @state: LIF state (enum pds_core_lif_state)
+ * @name: The name string, 0 terminated
+ * @features: Features (enum pds_core_hw_features)
+ * @stats_ctl: Stats control commands (enum pds_core_stats_ctl_cmd)
+ * @rsvd: Command Buffer padding
+ */
+struct pds_core_lif_setattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 client_id;
+ union {
+ u8 state;
+ char name[PDS_CORE_IFNAMSIZ];
+ __le64 features;
+ u8 stats_ctl;
+ u8 rsvd[60];
+ } __packed;
+};
+
+/**
+ * struct pds_core_lif_setattr_comp - LIF set attr command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @features: Features (enum pds_core_hw_features)
+ * @rsvd2: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_lif_setattr_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ union {
+ __le64 features;
+ u8 rsvd2[11];
+ } __packed;
+ u8 color;
+};
+
+/**
+ * struct pds_core_lif_getattr_cmd - Get LIF attributes from the NIC
+ * @opcode: Opcode PDS_AQ_CMD_LIF_GETATTR
+ * @attr: Attribute type (enum pds_core_lif_attr)
+ * @client_id: Client identifier
+ */
+struct pds_core_lif_getattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 client_id;
+};
+
+/**
+ * struct pds_core_lif_getattr_comp - LIF get attr command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @state: LIF state (enum pds_core_lif_state)
+ * @name: LIF name string, 0 terminated
+ * @features: Features (enum pds_core_hw_features)
+ * @rsvd2: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_lif_getattr_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ union {
+ u8 state;
+ __le64 features;
+ u8 rsvd2[11];
+ } __packed;
+ u8 color;
+};
+
+/**
+ * union pds_core_q_identity - Queue identity information
+ * @version: Queue type version that can be used with FW
+ * @supported: Bitfield of queue versions, first bit = ver 0
+ * @rsvd: Word boundary padding
+ * @features: Queue features
+ * @desc_sz: Descriptor size
+ * @comp_sz: Completion descriptor size
+ * @rsvd2: Word boundary padding
+ */
+struct pds_core_q_identity {
+ u8 version;
+ u8 supported;
+ u8 rsvd[6];
+#define PDS_CORE_QIDENT_F_CQ 0x01 /* queue has completion ring */
+ __le64 features;
+ __le16 desc_sz;
+ __le16 comp_sz;
+ u8 rsvd2[6];
+};
+
+/**
+ * struct pds_core_q_identify_cmd - queue identify command
+ * @opcode: Opcode PDS_AQ_CMD_Q_IDENTIFY
+ * @type: Logical queue type (enum pds_core_logical_qtype)
+ * @client_id: Client identifier
+ * @ver: Highest queue type version that the driver supports
+ * @rsvd: Word boundary padding
+ * @ident_pa: DMA address to receive the data (struct pds_core_q_identity)
+ */
+struct pds_core_q_identify_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 client_id;
+ u8 ver;
+ u8 rsvd[3];
+ __le64 ident_pa;
+};
+
+/**
+ * struct pds_core_q_identify_comp - queue identify command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @ver: Queue type version that can be used with FW
+ * @rsvd1: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_core_q_identify_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 ver;
+ u8 rsvd1[10];
+ u8 color;
+};
+
+/**
+ * struct pds_core_q_init_cmd - Queue init command
+ * @opcode: Opcode PDS_AQ_CMD_Q_INIT
+ * @type: Logical queue type
+ * @client_id: Client identifier
+ * @ver: Queue type version
+ * @rsvd: Word boundary padding
+ * @index: (LIF, qtype) relative admin queue index
+ * @intr_index: Interrupt control register index, or Event queue index
+ * @pid: Process ID
+ * @flags:
+ * IRQ: Interrupt requested on completion
+ * ENA: Enable the queue. If ENA=0 the queue is initialized
+ * but remains disabled, to be later enabled with the
+ * Queue Enable command. If ENA=1, then queue is
+ * initialized and then enabled.
+ * @cos: Class of service for this queue
+ * @ring_size: Queue ring size, encoded as a log2(size), in
+ * number of descriptors. The actual ring size is
+ * (1 << ring_size). For example, to select a ring size
+ * of 64 descriptors write ring_size = 6. The minimum
+ * ring_size value is 2 for a ring of 4 descriptors.
+ * The maximum ring_size value is 12 for a ring of 4k
+ * descriptors. Values of ring_size <2 and >12 are
+ * reserved.
+ * @ring_base: Queue ring base address
+ * @cq_ring_base: Completion queue ring base address
+ */
+struct pds_core_q_init_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 client_id;
+ u8 ver;
+ u8 rsvd[3];
+ __le32 index;
+ __le16 pid;
+ __le16 intr_index;
+ __le16 flags;
+#define PDS_CORE_QINIT_F_IRQ 0x01 /* Request interrupt on completion */
+#define PDS_CORE_QINIT_F_ENA 0x02 /* Enable the queue */
+ u8 cos;
+#define PDS_CORE_QSIZE_MIN_LG2 2
+#define PDS_CORE_QSIZE_MAX_LG2 12
+ u8 ring_size;
+ __le64 ring_base;
+ __le64 cq_ring_base;
+} __packed;
+
+/**
+ * struct pds_core_q_init_comp - Queue init command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @hw_index: Hardware Queue ID
+ * @hw_type: Hardware Queue type
+ * @rsvd2: Word boundary padding
+ * @color: Color
+ */
+struct pds_core_q_init_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ __le32 hw_index;
+ u8 hw_type;
+ u8 rsvd2[6];
+ u8 color;
+};
+
+union pds_core_adminq_cmd {
+ u8 opcode;
+ u8 bytes[64];
+
+ struct pds_core_client_reg_cmd client_reg;
+ struct pds_core_client_unreg_cmd client_unreg;
+ struct pds_core_client_request_cmd client_request;
+
+ struct pds_core_lif_identify_cmd lif_ident;
+ struct pds_core_lif_init_cmd lif_init;
+ struct pds_core_lif_reset_cmd lif_reset;
+ struct pds_core_lif_setattr_cmd lif_setattr;
+ struct pds_core_lif_getattr_cmd lif_getattr;
+
+ struct pds_core_q_identify_cmd q_ident;
+ struct pds_core_q_init_cmd q_init;
+};
+
+union pds_core_adminq_comp {
+ struct {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 rsvd2[11];
+ u8 color;
+ };
+ u32 words[4];
+
+ struct pds_core_client_reg_comp client_reg;
+
+ struct pds_core_lif_identify_comp lif_ident;
+ struct pds_core_lif_init_comp lif_init;
+ struct pds_core_lif_setattr_comp lif_setattr;
+ struct pds_core_lif_getattr_comp lif_getattr;
+
+ struct pds_core_q_identify_comp q_ident;
+ struct pds_core_q_init_comp q_init;
+};
+
+#ifndef __CHECKER__
+static_assert(sizeof(union pds_core_adminq_cmd) == 64);
+static_assert(sizeof(union pds_core_adminq_comp) == 16);
+static_assert(sizeof(union pds_core_notifyq_comp) == 64);
+#endif /* __CHECKER__ */
+
+/* The color bit is a 'done' bit for the completion descriptors
+ * where the meaning alternates between '1' and '0' for alternating
+ * passes through the completion descriptor ring.
+ */
+static inline bool pdsc_color_match(u8 color, bool done_color)
+{
+ return (!!(color & PDS_COMP_COLOR_MASK)) == done_color;
+}
+
+struct pdsc;
+int pdsc_adminq_post(struct pdsc *pdsc,
+ union pds_core_adminq_cmd *cmd,
+ union pds_core_adminq_comp *comp,
+ bool fast_poll);
+
+#endif /* _PDS_CORE_ADMINQ_H_ */
diff --git a/include/linux/pds/pds_auxbus.h b/include/linux/pds/pds_auxbus.h
new file mode 100644
index 000000000000..214ef12302d0
--- /dev/null
+++ b/include/linux/pds/pds_auxbus.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDSC_AUXBUS_H_
+#define _PDSC_AUXBUS_H_
+
+#include <linux/auxiliary_bus.h>
+
+struct pds_auxiliary_dev {
+ struct auxiliary_device aux_dev;
+ struct pci_dev *vf_pdev;
+ u16 client_id;
+};
+
+int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev,
+ union pds_core_adminq_cmd *req,
+ size_t req_len,
+ union pds_core_adminq_comp *resp,
+ u64 flags);
+#endif /* _PDSC_AUXBUS_H_ */
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
new file mode 100644
index 000000000000..060331486d50
--- /dev/null
+++ b/include/linux/pds/pds_common.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
+
+#ifndef _PDS_COMMON_H_
+#define _PDS_COMMON_H_
+
+#define PDS_CORE_DRV_NAME "pds_core"
+
+/* the device's internal addressing uses up to 52 bits */
+#define PDS_CORE_ADDR_LEN 52
+#define PDS_CORE_ADDR_MASK (BIT_ULL(PDS_ADDR_LEN) - 1)
+#define PDS_PAGE_SIZE 4096
+
+enum pds_core_driver_type {
+ PDS_DRIVER_LINUX = 1,
+ PDS_DRIVER_WIN = 2,
+ PDS_DRIVER_DPDK = 3,
+ PDS_DRIVER_FREEBSD = 4,
+ PDS_DRIVER_IPXE = 5,
+ PDS_DRIVER_ESXI = 6,
+};
+
+enum pds_core_vif_types {
+ PDS_DEV_TYPE_CORE = 0,
+ PDS_DEV_TYPE_VDPA = 1,
+ PDS_DEV_TYPE_VFIO = 2,
+ PDS_DEV_TYPE_ETH = 3,
+ PDS_DEV_TYPE_RDMA = 4,
+ PDS_DEV_TYPE_LM = 5,
+
+ /* new ones added before this line */
+ PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */
+};
+
+#define PDS_DEV_TYPE_CORE_STR "Core"
+#define PDS_DEV_TYPE_VDPA_STR "vDPA"
+#define PDS_DEV_TYPE_VFIO_STR "VFio"
+#define PDS_DEV_TYPE_ETH_STR "Eth"
+#define PDS_DEV_TYPE_RDMA_STR "RDMA"
+#define PDS_DEV_TYPE_LM_STR "LM"
+
+#define PDS_CORE_IFNAMSIZ 16
+
+/**
+ * enum pds_core_logical_qtype - Logical Queue Types
+ * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue
+ * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue
+ * @PDS_CORE_QTYPE_RXQ: Receive Queue
+ * @PDS_CORE_QTYPE_TXQ: Transmit Queue
+ * @PDS_CORE_QTYPE_EQ: Event Queue
+ * @PDS_CORE_QTYPE_MAX: Max queue type supported
+ */
+enum pds_core_logical_qtype {
+ PDS_CORE_QTYPE_ADMINQ = 0,
+ PDS_CORE_QTYPE_NOTIFYQ = 1,
+ PDS_CORE_QTYPE_RXQ = 2,
+ PDS_CORE_QTYPE_TXQ = 3,
+ PDS_CORE_QTYPE_EQ = 4,
+
+ PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */
+};
+
+int pdsc_register_notify(struct notifier_block *nb);
+void pdsc_unregister_notify(struct notifier_block *nb);
+void *pdsc_get_pf_struct(struct pci_dev *vf_pdev);
+int pds_client_register(struct pci_dev *pf_pdev, char *devname);
+int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id);
+#endif /* _PDS_COMMON_H_ */
diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
new file mode 100644
index 000000000000..e838a2b90440
--- /dev/null
+++ b/include/linux/pds/pds_core_if.h
@@ -0,0 +1,571 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
+
+#ifndef _PDS_CORE_IF_H_
+#define _PDS_CORE_IF_H_
+
+#define PCI_VENDOR_ID_PENSANDO 0x1dd8
+#define PCI_DEVICE_ID_PENSANDO_CORE_PF 0x100c
+#define PCI_DEVICE_ID_VIRTIO_NET_TRANS 0x1000
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
+#define PCI_DEVICE_ID_PENSANDO_VDPA_VF 0x100b
+#define PDS_CORE_BARS_MAX 4
+#define PDS_CORE_PCI_BAR_DBELL 1
+
+/* Bar0 */
+#define PDS_CORE_DEV_INFO_SIGNATURE 0x44455649 /* 'DEVI' */
+#define PDS_CORE_BAR0_SIZE 0x8000
+#define PDS_CORE_BAR0_DEV_INFO_REGS_OFFSET 0x0000
+#define PDS_CORE_BAR0_DEV_CMD_REGS_OFFSET 0x0800
+#define PDS_CORE_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00
+#define PDS_CORE_BAR0_INTR_STATUS_OFFSET 0x1000
+#define PDS_CORE_BAR0_INTR_CTRL_OFFSET 0x2000
+#define PDS_CORE_DEV_CMD_DONE 0x00000001
+
+#define PDS_CORE_DEVCMD_TIMEOUT 5
+
+#define PDS_CORE_CLIENT_ID 0
+#define PDS_CORE_ASIC_TYPE_CAPRI 0
+
+/*
+ * enum pds_core_cmd_opcode - Device commands
+ */
+enum pds_core_cmd_opcode {
+ /* Core init */
+ PDS_CORE_CMD_NOP = 0,
+ PDS_CORE_CMD_IDENTIFY = 1,
+ PDS_CORE_CMD_RESET = 2,
+ PDS_CORE_CMD_INIT = 3,
+
+ PDS_CORE_CMD_FW_DOWNLOAD = 4,
+ PDS_CORE_CMD_FW_CONTROL = 5,
+
+ /* SR/IOV commands */
+ PDS_CORE_CMD_VF_GETATTR = 60,
+ PDS_CORE_CMD_VF_SETATTR = 61,
+ PDS_CORE_CMD_VF_CTRL = 62,
+
+ /* Add commands before this line */
+ PDS_CORE_CMD_MAX,
+ PDS_CORE_CMD_COUNT
+};
+
+/*
+ * enum pds_core_status_code - Device command return codes
+ */
+enum pds_core_status_code {
+ PDS_RC_SUCCESS = 0, /* Success */
+ PDS_RC_EVERSION = 1, /* Incorrect version for request */
+ PDS_RC_EOPCODE = 2, /* Invalid cmd opcode */
+ PDS_RC_EIO = 3, /* I/O error */
+ PDS_RC_EPERM = 4, /* Permission denied */
+ PDS_RC_EQID = 5, /* Bad qid */
+ PDS_RC_EQTYPE = 6, /* Bad qtype */
+ PDS_RC_ENOENT = 7, /* No such element */
+ PDS_RC_EINTR = 8, /* operation interrupted */
+ PDS_RC_EAGAIN = 9, /* Try again */
+ PDS_RC_ENOMEM = 10, /* Out of memory */
+ PDS_RC_EFAULT = 11, /* Bad address */
+ PDS_RC_EBUSY = 12, /* Device or resource busy */
+ PDS_RC_EEXIST = 13, /* object already exists */
+ PDS_RC_EINVAL = 14, /* Invalid argument */
+ PDS_RC_ENOSPC = 15, /* No space left or alloc failure */
+ PDS_RC_ERANGE = 16, /* Parameter out of range */
+ PDS_RC_BAD_ADDR = 17, /* Descriptor contains a bad ptr */
+ PDS_RC_DEV_CMD = 18, /* Device cmd attempted on AdminQ */
+ PDS_RC_ENOSUPP = 19, /* Operation not supported */
+ PDS_RC_ERROR = 29, /* Generic error */
+ PDS_RC_ERDMA = 30, /* Generic RDMA error */
+ PDS_RC_EVFID = 31, /* VF ID does not exist */
+ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */
+ PDS_RC_ECLIENT = 33, /* No such client id */
+};
+
+/**
+ * struct pds_core_drv_identity - Driver identity information
+ * @drv_type: Driver type (enum pds_core_driver_type)
+ * @os_dist: OS distribution, numeric format
+ * @os_dist_str: OS distribution, string format
+ * @kernel_ver: Kernel version, numeric format
+ * @kernel_ver_str: Kernel version, string format
+ * @driver_ver_str: Driver version, string format
+ */
+struct pds_core_drv_identity {
+ __le32 drv_type;
+ __le32 os_dist;
+ char os_dist_str[128];
+ __le32 kernel_ver;
+ char kernel_ver_str[32];
+ char driver_ver_str[32];
+};
+
+#define PDS_DEV_TYPE_MAX 16
+/**
+ * struct pds_core_dev_identity - Device identity information
+ * @version: Version of device identify
+ * @type: Identify type (0 for now)
+ * @state: Device state
+ * @rsvd: Word boundary padding
+ * @nlifs: Number of LIFs provisioned
+ * @nintrs: Number of interrupts provisioned
+ * @ndbpgs_per_lif: Number of doorbell pages per LIF
+ * @intr_coal_mult: Interrupt coalescing multiplication factor
+ * Scale user-supplied interrupt coalescing
+ * value in usecs to device units using:
+ * device units = usecs * mult / div
+ * @intr_coal_div: Interrupt coalescing division factor
+ * Scale user-supplied interrupt coalescing
+ * value in usecs to device units using:
+ * device units = usecs * mult / div
+ * @vif_types: How many of each VIF device type is supported
+ */
+struct pds_core_dev_identity {
+ u8 version;
+ u8 type;
+ u8 state;
+ u8 rsvd;
+ __le32 nlifs;
+ __le32 nintrs;
+ __le32 ndbpgs_per_lif;
+ __le32 intr_coal_mult;
+ __le32 intr_coal_div;
+ __le16 vif_types[PDS_DEV_TYPE_MAX];
+};
+
+#define PDS_CORE_IDENTITY_VERSION_1 1
+
+/**
+ * struct pds_core_dev_identify_cmd - Driver/device identify command
+ * @opcode: Opcode PDS_CORE_CMD_IDENTIFY
+ * @ver: Highest version of identify supported by driver
+ *
+ * Expects to find driver identification info (struct pds_core_drv_identity)
+ * in cmd_regs->data. Driver should keep the devcmd interface locked
+ * while preparing the driver info.
+ */
+struct pds_core_dev_identify_cmd {
+ u8 opcode;
+ u8 ver;
+};
+
+/**
+ * struct pds_core_dev_identify_comp - Device identify command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @ver: Version of identify returned by device
+ *
+ * Device identification info (struct pds_core_dev_identity) can be found
+ * in cmd_regs->data. Driver should keep the devcmd interface locked
+ * while reading the results.
+ */
+struct pds_core_dev_identify_comp {
+ u8 status;
+ u8 ver;
+};
+
+/**
+ * struct pds_core_dev_reset_cmd - Device reset command
+ * @opcode: Opcode PDS_CORE_CMD_RESET
+ *
+ * Resets and clears all LIFs, VDevs, and VIFs on the device.
+ */
+struct pds_core_dev_reset_cmd {
+ u8 opcode;
+};
+
+/**
+ * struct pds_core_dev_reset_comp - Reset command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_dev_reset_comp {
+ u8 status;
+};
+
+/*
+ * struct pds_core_dev_init_data - Pointers and info needed for the Core
+ * initialization PDS_CORE_CMD_INIT command. The in and out structs are
+ * overlays on the pds_core_dev_cmd_regs.data space for passing data down
+ * to the firmware on init, and then returning initialization results.
+ */
+struct pds_core_dev_init_data_in {
+ __le64 adminq_q_base;
+ __le64 adminq_cq_base;
+ __le64 notifyq_cq_base;
+ __le32 flags;
+ __le16 intr_index;
+ u8 adminq_ring_size;
+ u8 notifyq_ring_size;
+};
+
+struct pds_core_dev_init_data_out {
+ __le32 core_hw_index;
+ __le32 adminq_hw_index;
+ __le32 notifyq_hw_index;
+ u8 adminq_hw_type;
+ u8 notifyq_hw_type;
+};
+
+/**
+ * struct pds_core_dev_init_cmd - Core device initialize
+ * @opcode: opcode PDS_CORE_CMD_INIT
+ *
+ * Initializes the core device and sets up the AdminQ and NotifyQ.
+ * Expects to find initialization data (struct pds_core_dev_init_data_in)
+ * in cmd_regs->data. Driver should keep the devcmd interface locked
+ * while preparing the driver info.
+ */
+struct pds_core_dev_init_cmd {
+ u8 opcode;
+};
+
+/**
+ * struct pds_core_dev_init_comp - Core init completion
+ * @status: Status of the command (enum pds_core_status_code)
+ *
+ * Initialization result data (struct pds_core_dev_init_data_in)
+ * is found in cmd_regs->data.
+ */
+struct pds_core_dev_init_comp {
+ u8 status;
+};
+
+/**
+ * struct pds_core_fw_download_cmd - Firmware download command
+ * @opcode: opcode
+ * @rsvd: Word boundary padding
+ * @addr: DMA address of the firmware buffer
+ * @offset: offset of the firmware buffer within the full image
+ * @length: number of valid bytes in the firmware buffer
+ */
+struct pds_core_fw_download_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ __le32 offset;
+ __le64 addr;
+ __le32 length;
+};
+
+/**
+ * struct pds_core_fw_download_comp - Firmware download completion
+ * @status: Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_fw_download_comp {
+ u8 status;
+};
+
+/**
+ * enum pds_core_fw_control_oper - FW control operations
+ * @PDS_CORE_FW_INSTALL_ASYNC: Install firmware asynchronously
+ * @PDS_CORE_FW_INSTALL_STATUS: Firmware installation status
+ * @PDS_CORE_FW_ACTIVATE_ASYNC: Activate firmware asynchronously
+ * @PDS_CORE_FW_ACTIVATE_STATUS: Firmware activate status
+ * @PDS_CORE_FW_UPDATE_CLEANUP: Cleanup any firmware update leftovers
+ * @PDS_CORE_FW_GET_BOOT: Return current active firmware slot
+ * @PDS_CORE_FW_SET_BOOT: Set active firmware slot for next boot
+ * @PDS_CORE_FW_GET_LIST: Return list of installed firmware images
+ */
+enum pds_core_fw_control_oper {
+ PDS_CORE_FW_INSTALL_ASYNC = 0,
+ PDS_CORE_FW_INSTALL_STATUS = 1,
+ PDS_CORE_FW_ACTIVATE_ASYNC = 2,
+ PDS_CORE_FW_ACTIVATE_STATUS = 3,
+ PDS_CORE_FW_UPDATE_CLEANUP = 4,
+ PDS_CORE_FW_GET_BOOT = 5,
+ PDS_CORE_FW_SET_BOOT = 6,
+ PDS_CORE_FW_GET_LIST = 7,
+};
+
+enum pds_core_fw_slot {
+ PDS_CORE_FW_SLOT_INVALID = 0,
+ PDS_CORE_FW_SLOT_A = 1,
+ PDS_CORE_FW_SLOT_B = 2,
+ PDS_CORE_FW_SLOT_GOLD = 3,
+};
+
+/**
+ * struct pds_core_fw_control_cmd - Firmware control command
+ * @opcode: opcode
+ * @rsvd: Word boundary padding
+ * @oper: firmware control operation (enum pds_core_fw_control_oper)
+ * @slot: slot to operate on (enum pds_core_fw_slot)
+ */
+struct pds_core_fw_control_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ u8 oper;
+ u8 slot;
+};
+
+/**
+ * struct pds_core_fw_control_comp - Firmware control copletion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word alignment space
+ * @slot: Slot number (enum pds_core_fw_slot)
+ * @rsvd1: Struct padding
+ * @color: Color bit
+ */
+struct pds_core_fw_control_comp {
+ u8 status;
+ u8 rsvd[3];
+ u8 slot;
+ u8 rsvd1[10];
+ u8 color;
+};
+
+struct pds_core_fw_name_info {
+#define PDS_CORE_FWSLOT_BUFLEN 8
+#define PDS_CORE_FWVERS_BUFLEN 32
+ char slotname[PDS_CORE_FWSLOT_BUFLEN];
+ char fw_version[PDS_CORE_FWVERS_BUFLEN];
+};
+
+struct pds_core_fw_list_info {
+#define PDS_CORE_FWVERS_LIST_LEN 16
+ u8 num_fw_slots;
+ struct pds_core_fw_name_info fw_names[PDS_CORE_FWVERS_LIST_LEN];
+} __packed;
+
+enum pds_core_vf_attr {
+ PDS_CORE_VF_ATTR_SPOOFCHK = 1,
+ PDS_CORE_VF_ATTR_TRUST = 2,
+ PDS_CORE_VF_ATTR_MAC = 3,
+ PDS_CORE_VF_ATTR_LINKSTATE = 4,
+ PDS_CORE_VF_ATTR_VLAN = 5,
+ PDS_CORE_VF_ATTR_RATE = 6,
+ PDS_CORE_VF_ATTR_STATSADDR = 7,
+};
+
+/**
+ * enum pds_core_vf_link_status - Virtual Function link status
+ * @PDS_CORE_VF_LINK_STATUS_AUTO: Use link state of the uplink
+ * @PDS_CORE_VF_LINK_STATUS_UP: Link always up
+ * @PDS_CORE_VF_LINK_STATUS_DOWN: Link always down
+ */
+enum pds_core_vf_link_status {
+ PDS_CORE_VF_LINK_STATUS_AUTO = 0,
+ PDS_CORE_VF_LINK_STATUS_UP = 1,
+ PDS_CORE_VF_LINK_STATUS_DOWN = 2,
+};
+
+/**
+ * struct pds_core_vf_setattr_cmd - Set VF attributes on the NIC
+ * @opcode: Opcode
+ * @attr: Attribute type (enum pds_core_vf_attr)
+ * @vf_index: VF index
+ * @macaddr: mac address
+ * @vlanid: vlan ID
+ * @maxrate: max Tx rate in Mbps
+ * @spoofchk: enable address spoof checking
+ * @trust: enable VF trust
+ * @linkstate: set link up or down
+ * @stats: stats addr struct
+ * @stats.pa: set DMA address for VF stats
+ * @stats.len: length of VF stats space
+ * @pad: force union to specific size
+ */
+struct pds_core_vf_setattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 vf_index;
+ union {
+ u8 macaddr[6];
+ __le16 vlanid;
+ __le32 maxrate;
+ u8 spoofchk;
+ u8 trust;
+ u8 linkstate;
+ struct {
+ __le64 pa;
+ __le32 len;
+ } stats;
+ u8 pad[60];
+ } __packed;
+};
+
+struct pds_core_vf_setattr_comp {
+ u8 status;
+ u8 attr;
+ __le16 vf_index;
+ __le16 comp_index;
+ u8 rsvd[9];
+ u8 color;
+};
+
+/**
+ * struct pds_core_vf_getattr_cmd - Get VF attributes from the NIC
+ * @opcode: Opcode
+ * @attr: Attribute type (enum pds_core_vf_attr)
+ * @vf_index: VF index
+ */
+struct pds_core_vf_getattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 vf_index;
+};
+
+struct pds_core_vf_getattr_comp {
+ u8 status;
+ u8 attr;
+ __le16 vf_index;
+ union {
+ u8 macaddr[6];
+ __le16 vlanid;
+ __le32 maxrate;
+ u8 spoofchk;
+ u8 trust;
+ u8 linkstate;
+ __le64 stats_pa;
+ u8 pad[11];
+ } __packed;
+ u8 color;
+};
+
+enum pds_core_vf_ctrl_opcode {
+ PDS_CORE_VF_CTRL_START_ALL = 0,
+ PDS_CORE_VF_CTRL_START = 1,
+};
+
+/**
+ * struct pds_core_vf_ctrl_cmd - VF control command
+ * @opcode: Opcode for the command
+ * @ctrl_opcode: VF control operation type
+ * @vf_index: VF Index. It is unused if op START_ALL is used.
+ */
+
+struct pds_core_vf_ctrl_cmd {
+ u8 opcode;
+ u8 ctrl_opcode;
+ __le16 vf_index;
+};
+
+/**
+ * struct pds_core_vf_ctrl_comp - VF_CTRL command completion.
+ * @status: Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_vf_ctrl_comp {
+ u8 status;
+};
+
+/*
+ * union pds_core_dev_cmd - Overlay of core device command structures
+ */
+union pds_core_dev_cmd {
+ u8 opcode;
+ u32 words[16];
+
+ struct pds_core_dev_identify_cmd identify;
+ struct pds_core_dev_init_cmd init;
+ struct pds_core_dev_reset_cmd reset;
+ struct pds_core_fw_download_cmd fw_download;
+ struct pds_core_fw_control_cmd fw_control;
+
+ struct pds_core_vf_setattr_cmd vf_setattr;
+ struct pds_core_vf_getattr_cmd vf_getattr;
+ struct pds_core_vf_ctrl_cmd vf_ctrl;
+};
+
+/*
+ * union pds_core_dev_comp - Overlay of core device completion structures
+ */
+union pds_core_dev_comp {
+ u8 status;
+ u8 bytes[16];
+
+ struct pds_core_dev_identify_comp identify;
+ struct pds_core_dev_reset_comp reset;
+ struct pds_core_dev_init_comp init;
+ struct pds_core_fw_download_comp fw_download;
+ struct pds_core_fw_control_comp fw_control;
+
+ struct pds_core_vf_setattr_comp vf_setattr;
+ struct pds_core_vf_getattr_comp vf_getattr;
+ struct pds_core_vf_ctrl_comp vf_ctrl;
+};
+
+/**
+ * struct pds_core_dev_hwstamp_regs - Hardware current timestamp registers
+ * @tick_low: Low 32 bits of hardware timestamp
+ * @tick_high: High 32 bits of hardware timestamp
+ */
+struct pds_core_dev_hwstamp_regs {
+ u32 tick_low;
+ u32 tick_high;
+};
+
+/**
+ * struct pds_core_dev_info_regs - Device info register format (read-only)
+ * @signature: Signature value of 0x44455649 ('DEVI')
+ * @version: Current version of info
+ * @asic_type: Asic type
+ * @asic_rev: Asic revision
+ * @fw_status: Firmware status
+ * bit 0 - 1 = fw running
+ * bit 4-7 - 4 bit generation number, changes on fw restart
+ * @fw_heartbeat: Firmware heartbeat counter
+ * @serial_num: Serial number
+ * @fw_version: Firmware version
+ * @oprom_regs: oprom_regs to store oprom debug enable/disable and bmp
+ * @rsvd_pad1024: Struct padding
+ * @hwstamp: Hardware current timestamp registers
+ * @rsvd_pad2048: Struct padding
+ */
+struct pds_core_dev_info_regs {
+#define PDS_CORE_DEVINFO_FWVERS_BUFLEN 32
+#define PDS_CORE_DEVINFO_SERIAL_BUFLEN 32
+ u32 signature;
+ u8 version;
+ u8 asic_type;
+ u8 asic_rev;
+#define PDS_CORE_FW_STS_F_STOPPED 0x00
+#define PDS_CORE_FW_STS_F_RUNNING 0x01
+#define PDS_CORE_FW_STS_F_GENERATION 0xF0
+ u8 fw_status;
+ __le32 fw_heartbeat;
+ char fw_version[PDS_CORE_DEVINFO_FWVERS_BUFLEN];
+ char serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN];
+ u8 oprom_regs[32]; /* reserved */
+ u8 rsvd_pad1024[916];
+ struct pds_core_dev_hwstamp_regs hwstamp; /* on 1k boundary */
+ u8 rsvd_pad2048[1016];
+} __packed;
+
+/**
+ * struct pds_core_dev_cmd_regs - Device command register format (read-write)
+ * @doorbell: Device Cmd Doorbell, write-only
+ * Write a 1 to signal device to process cmd
+ * @done: Command completed indicator, poll for completion
+ * bit 0 == 1 when command is complete
+ * @cmd: Opcode-specific command bytes
+ * @comp: Opcode-specific response bytes
+ * @rsvd: Struct padding
+ * @data: Opcode-specific side-data
+ */
+struct pds_core_dev_cmd_regs {
+ u32 doorbell;
+ u32 done;
+ union pds_core_dev_cmd cmd;
+ union pds_core_dev_comp comp;
+ u8 rsvd[48];
+ u32 data[478];
+} __packed;
+
+/**
+ * struct pds_core_dev_regs - Device register format for bar 0 page 0
+ * @info: Device info registers
+ * @devcmd: Device command registers
+ */
+struct pds_core_dev_regs {
+ struct pds_core_dev_info_regs info;
+ struct pds_core_dev_cmd_regs devcmd;
+} __packed;
+
+#ifndef __CHECKER__
+static_assert(sizeof(struct pds_core_drv_identity) <= 1912);
+static_assert(sizeof(struct pds_core_dev_identity) <= 1912);
+static_assert(sizeof(union pds_core_dev_cmd) == 64);
+static_assert(sizeof(union pds_core_dev_comp) == 16);
+static_assert(sizeof(struct pds_core_dev_info_regs) == 2048);
+static_assert(sizeof(struct pds_core_dev_cmd_regs) == 2048);
+static_assert(sizeof(struct pds_core_dev_regs) == 4096);
+#endif /* __CHECKER__ */
+
+#endif /* _PDS_CORE_IF_H_ */
diff --git a/include/linux/pds/pds_intr.h b/include/linux/pds/pds_intr.h
new file mode 100644
index 000000000000..56277c37248c
--- /dev/null
+++ b/include/linux/pds/pds_intr.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
+
+#ifndef _PDS_INTR_H_
+#define _PDS_INTR_H_
+
+/*
+ * Interrupt control register
+ * @coal_init: Coalescing timer initial value, in
+ * device units. Use @identity->intr_coal_mult
+ * and @identity->intr_coal_div to convert from
+ * usecs to device units:
+ *
+ * coal_init = coal_usecs * coal_mutl / coal_div
+ *
+ * When an interrupt is sent the interrupt
+ * coalescing timer current value
+ * (@coalescing_curr) is initialized with this
+ * value and begins counting down. No more
+ * interrupts are sent until the coalescing
+ * timer reaches 0. When @coalescing_init=0
+ * interrupt coalescing is effectively disabled
+ * and every interrupt assert results in an
+ * interrupt. Reset value: 0
+ * @mask: Interrupt mask. When @mask=1 the interrupt
+ * resource will not send an interrupt. When
+ * @mask=0 the interrupt resource will send an
+ * interrupt if an interrupt event is pending
+ * or on the next interrupt assertion event.
+ * Reset value: 1
+ * @credits: Interrupt credits. This register indicates
+ * how many interrupt events the hardware has
+ * sent. When written by software this
+ * register atomically decrements @int_credits
+ * by the value written. When @int_credits
+ * becomes 0 then the "pending interrupt" bit
+ * in the Interrupt Status register is cleared
+ * by the hardware and any pending but unsent
+ * interrupts are cleared.
+ * !!!IMPORTANT!!! This is a signed register.
+ * @flags: Interrupt control flags
+ * @unmask -- When this bit is written with a 1
+ * the interrupt resource will set mask=0.
+ * @coal_timer_reset -- When this
+ * bit is written with a 1 the
+ * @coalescing_curr will be reloaded with
+ * @coalescing_init to reset the coalescing
+ * timer.
+ * @mask_on_assert: Automatically mask on assertion. When
+ * @mask_on_assert=1 the interrupt resource
+ * will set @mask=1 whenever an interrupt is
+ * sent. When using interrupts in Legacy
+ * Interrupt mode the driver must select
+ * @mask_on_assert=0 for proper interrupt
+ * operation.
+ * @coalescing_curr: Coalescing timer current value, in
+ * microseconds. When this value reaches 0
+ * the interrupt resource is again eligible to
+ * send an interrupt. If an interrupt event
+ * is already pending when @coalescing_curr
+ * reaches 0 the pending interrupt will be
+ * sent, otherwise an interrupt will be sent
+ * on the next interrupt assertion event.
+ */
+struct pds_core_intr {
+ u32 coal_init;
+ u32 mask;
+ u16 credits;
+ u16 flags;
+#define PDS_CORE_INTR_F_UNMASK 0x0001
+#define PDS_CORE_INTR_F_TIMER_RESET 0x0002
+ u32 mask_on_assert;
+ u32 coalescing_curr;
+ u32 rsvd6[3];
+};
+
+#ifndef __CHECKER__
+static_assert(sizeof(struct pds_core_intr) == 32);
+#endif /* __CHECKER__ */
+
+#define PDS_CORE_INTR_CTRL_REGS_MAX 2048
+#define PDS_CORE_INTR_CTRL_COAL_MAX 0x3F
+#define PDS_CORE_INTR_INDEX_NOT_ASSIGNED -1
+
+struct pds_core_intr_status {
+ u32 status[2];
+};
+
+/**
+ * enum pds_core_intr_mask_vals - valid values for mask and mask_assert.
+ * @PDS_CORE_INTR_MASK_CLEAR: unmask interrupt.
+ * @PDS_CORE_INTR_MASK_SET: mask interrupt.
+ */
+enum pds_core_intr_mask_vals {
+ PDS_CORE_INTR_MASK_CLEAR = 0,
+ PDS_CORE_INTR_MASK_SET = 1,
+};
+
+/**
+ * enum pds_core_intr_credits_bits - Bitwise composition of credits values.
+ * @PDS_CORE_INTR_CRED_COUNT: bit mask of credit count, no shift needed.
+ * @PDS_CORE_INTR_CRED_COUNT_SIGNED: bit mask of credit count, including sign bit.
+ * @PDS_CORE_INTR_CRED_UNMASK: unmask the interrupt.
+ * @PDS_CORE_INTR_CRED_RESET_COALESCE: reset the coalesce timer.
+ * @PDS_CORE_INTR_CRED_REARM: unmask the and reset the timer.
+ */
+enum pds_core_intr_credits_bits {
+ PDS_CORE_INTR_CRED_COUNT = 0x7fffu,
+ PDS_CORE_INTR_CRED_COUNT_SIGNED = 0xffffu,
+ PDS_CORE_INTR_CRED_UNMASK = 0x10000u,
+ PDS_CORE_INTR_CRED_RESET_COALESCE = 0x20000u,
+ PDS_CORE_INTR_CRED_REARM = (PDS_CORE_INTR_CRED_UNMASK |
+ PDS_CORE_INTR_CRED_RESET_COALESCE),
+};
+
+static inline void
+pds_core_intr_coal_init(struct pds_core_intr __iomem *intr_ctrl, u32 coal)
+{
+ iowrite32(coal, &intr_ctrl->coal_init);
+}
+
+static inline void
+pds_core_intr_mask(struct pds_core_intr __iomem *intr_ctrl, u32 mask)
+{
+ iowrite32(mask, &intr_ctrl->mask);
+}
+
+static inline void
+pds_core_intr_credits(struct pds_core_intr __iomem *intr_ctrl,
+ u32 cred, u32 flags)
+{
+ if (WARN_ON_ONCE(cred > PDS_CORE_INTR_CRED_COUNT)) {
+ cred = ioread32(&intr_ctrl->credits);
+ cred &= PDS_CORE_INTR_CRED_COUNT_SIGNED;
+ }
+
+ iowrite32(cred | flags, &intr_ctrl->credits);
+}
+
+static inline void
+pds_core_intr_clean_flags(struct pds_core_intr __iomem *intr_ctrl, u32 flags)
+{
+ u32 cred;
+
+ cred = ioread32(&intr_ctrl->credits);
+ cred &= PDS_CORE_INTR_CRED_COUNT_SIGNED;
+ cred |= flags;
+ iowrite32(cred, &intr_ctrl->credits);
+}
+
+static inline void
+pds_core_intr_clean(struct pds_core_intr __iomem *intr_ctrl)
+{
+ pds_core_intr_clean_flags(intr_ctrl, PDS_CORE_INTR_CRED_RESET_COALESCE);
+}
+
+static inline void
+pds_core_intr_mask_assert(struct pds_core_intr __iomem *intr_ctrl, u32 mask)
+{
+ iowrite32(mask, &intr_ctrl->mask_on_assert);
+}
+
+#endif /* _PDS_INTR_H_ */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2f83cfc206e5..c5a0dc829714 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -14,6 +14,7 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/ethtool.h>
+#include <linux/leds.h>
#include <linux/linkmode.h>
#include <linux/netlink.h>
#include <linux/mdio.h>
@@ -600,6 +601,7 @@ struct macsec_ops;
* @phy_num_led_triggers: Number of triggers in @phy_led_triggers
* @led_link_trigger: LED trigger for link up/down
* @last_triggered: last LED trigger for link speed
+ * @leds: list of PHY LED structures
* @master_slave_set: User requested master/slave configuration
* @master_slave_get: Current master/slave advertisement
* @master_slave_state: Current master/slave configuration
@@ -699,6 +701,7 @@ struct phy_device {
struct phy_led_trigger *led_link_trigger;
#endif
+ struct list_head leds;
/*
* Interrupt number for this PHY
@@ -835,6 +838,23 @@ struct phy_plca_status {
};
/**
+ * struct phy_led: An LED driven by the PHY
+ *
+ * @list: List of LEDs
+ * @phydev: PHY this LED is attached to
+ * @led_cdev: Standard LED class structure
+ * @index: Number of the LED
+ */
+struct phy_led {
+ struct list_head list;
+ struct phy_device *phydev;
+ struct led_classdev led_cdev;
+ u8 index;
+};
+
+#define to_phy_led(d) container_of(d, struct phy_led, led_cdev)
+
+/**
* struct phy_driver - Driver structure for a particular PHY type
*
* @mdiodrv: Data common to all MDIO devices
@@ -1056,6 +1076,27 @@ struct phy_driver {
/** @get_plca_status: Return the current PLCA status info */
int (*get_plca_status)(struct phy_device *dev,
struct phy_plca_status *plca_st);
+
+ /**
+ * @led_brightness_set: Set a PHY LED brightness. Index
+ * indicates which of the PHYs led should be set. Value
+ * follows the standard LED class meaning, e.g. LED_OFF,
+ * LED_HALF, LED_FULL.
+ */
+ int (*led_brightness_set)(struct phy_device *dev,
+ u8 index, enum led_brightness value);
+
+ /**
+ * @led_blink_set: Set a PHY LED brightness. Index indicates
+ * which of the PHYs led should be configured to blink. Delays
+ * are in milliseconds and if both are zero then a sensible
+ * default should be chosen. The call should adjust the
+ * timings in that case and if it can't match the values
+ * specified exactly.
+ */
+ int (*led_blink_set)(struct phy_device *dev, u8 index,
+ unsigned long *delay_on,
+ unsigned long *delay_off);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 9ff56b050584..71755c66c162 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -572,6 +572,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
phy_interface_t iface,
const struct phylink_mac_ops *mac_ops);
void phylink_destroy(struct phylink *);
+bool phylink_expects_phy(struct phylink *pl);
int phylink_connect_phy(struct phylink *, struct phy_device *);
int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f0c87baaf6c0..3d6cf306cd55 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -25,7 +25,8 @@ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned change, u32 event,
gfp_t flags, int *new_nsid,
- int new_ifindex, u32 portid, u32 seq);
+ int new_ifindex, u32 portid,
+ const struct nlmsghdr *nlh);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
gfp_t flags, u32 portid, const struct nlmsghdr *nlh);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63d242164b1a..b11b4517760f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,11 +1318,6 @@ struct task_struct {
struct tlbflush_unmap_batch tlb_ubc;
- union {
- refcount_t rcu_users;
- struct rcu_head rcu;
- };
-
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
@@ -1459,6 +1454,8 @@ struct task_struct {
unsigned long saved_state_change;
# endif
#endif
+ struct rcu_head rcu;
+ refcount_t rcu_users;
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 358dc08e0831..836a7e200f39 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -222,7 +222,7 @@ struct sctp_datahdr {
__be16 stream;
__be16 ssn;
__u32 ppid;
- __u8 payload[];
+ /* __u8 payload[]; */
};
struct sctp_data_chunk {
@@ -270,7 +270,7 @@ struct sctp_inithdr {
__be16 num_outbound_streams;
__be16 num_inbound_streams;
__be32 initial_tsn;
- __u8 params[];
+ /* __u8 params[]; */
};
struct sctp_init_chunk {
@@ -385,7 +385,7 @@ struct sctp_sackhdr {
__be32 a_rwnd;
__be16 num_gap_ack_blocks;
__be16 num_dup_tsns;
- union sctp_sack_variable variable[];
+ /* union sctp_sack_variable variable[]; */
};
struct sctp_sack_chunk {
@@ -443,7 +443,7 @@ struct sctp_shutdown_chunk {
struct sctp_errhdr {
__be16 cause;
__be16 length;
- __u8 variable[];
+ /* __u8 variable[]; */
};
struct sctp_operr_chunk {
@@ -603,7 +603,7 @@ struct sctp_fwdtsn_skip {
struct sctp_fwdtsn_hdr {
__be32 new_cum_tsn;
- struct sctp_fwdtsn_skip skip[];
+ /* struct sctp_fwdtsn_skip skip[]; */
};
struct sctp_fwdtsn_chunk {
@@ -620,7 +620,7 @@ struct sctp_ifwdtsn_skip {
struct sctp_ifwdtsn_hdr {
__be32 new_cum_tsn;
- struct sctp_ifwdtsn_skip skip[];
+ /* struct sctp_ifwdtsn_skip skip[]; */
};
struct sctp_ifwdtsn_chunk {
@@ -667,7 +667,7 @@ struct sctp_addip_param {
struct sctp_addiphdr {
__be32 serial;
- __u8 params[];
+ /* __u8 params[]; */
};
struct sctp_addip_chunk {
@@ -727,7 +727,7 @@ struct sctp_addip_chunk {
struct sctp_authhdr {
__be16 shkey_id;
__be16 hmac_id;
- __u8 hmac[];
+ /* __u8 hmac[]; */
};
struct sctp_auth_chunk {
@@ -742,7 +742,7 @@ struct sctp_infox {
struct sctp_reconf_chunk {
struct sctp_chunkhdr chunk_hdr;
- __u8 params[];
+ /* __u8 params[]; */
};
struct sctp_strreset_outreq {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82511b2f61ea..eb9e7bb76fa6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,7 +37,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#endif
#include <net/net_debug.h>
-#include <net/dropreason.h>
+#include <net/dropreason-core.h>
/**
* DOC: skb checksums
@@ -294,6 +294,7 @@ struct nf_bridge_info {
u8 pkt_otherhost:1;
u8 in_prerouting:1;
u8 bridged_dnat:1;
+ u8 sabotage_in_done:1;
__u16 frag_max_size;
struct net_device *physindev;
@@ -934,38 +935,44 @@ struct sk_buff {
/* public: */
__u8 pkt_type:3; /* see PKT_TYPE_MAX */
__u8 ignore_df:1;
- __u8 nf_trace:1;
+ __u8 dst_pending_confirm:1;
__u8 ip_summed:2;
__u8 ooo_okay:1;
+ /* private: */
+ __u8 __mono_tc_offset[0];
+ /* public: */
+ __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
+#ifdef CONFIG_NET_CLS_ACT
+ __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
+ __u8 tc_skip_classify:1;
+#endif
+ __u8 remcsum_offload:1;
+ __u8 csum_complete_sw:1;
+ __u8 csum_level:2;
+ __u8 inner_protocol_type:1;
+
__u8 l4_hash:1;
__u8 sw_hash:1;
+#ifdef CONFIG_WIRELESS
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
+#endif
__u8 no_fcs:1;
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
-
- /* private: */
- __u8 __pkt_vlan_present_offset[0];
- /* public: */
- __u8 remcsum_offload:1;
- __u8 csum_complete_sw:1;
- __u8 csum_level:2;
- __u8 dst_pending_confirm:1;
- __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
-#ifdef CONFIG_NET_CLS_ACT
- __u8 tc_skip_classify:1;
- __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
-#endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
+#if IS_ENABLED(CONFIG_IP_VS)
__u8 ipvs_property:1;
- __u8 inner_protocol_type:1;
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
+ __u8 nf_trace:1;
+#endif
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
__u8 offload_l3_fwd_mark:1;
@@ -981,12 +988,16 @@ struct sk_buff {
__u8 decrypted:1;
#endif
__u8 slow_gro:1;
+#if IS_ENABLED(CONFIG_IP_SCTP)
__u8 csum_not_inet:1;
+#endif
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#endif
+ u16 alloc_cpu;
+
union {
__wsum csum;
struct {
@@ -1010,7 +1021,6 @@ struct sk_buff {
unsigned int sender_cpu;
};
#endif
- u16 alloc_cpu;
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
#endif
@@ -1066,13 +1076,13 @@ struct sk_buff {
* around, you also must adapt these constants.
*/
#ifdef __BIG_ENDIAN_BITFIELD
-#define TC_AT_INGRESS_MASK (1 << 0)
-#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2)
+#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
+#define TC_AT_INGRESS_MASK (1 << 6)
#else
-#define TC_AT_INGRESS_MASK (1 << 7)
-#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5)
+#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
+#define TC_AT_INGRESS_MASK (1 << 1)
#endif
-#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
+#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
#ifdef __KERNEL__
/*
@@ -1187,6 +1197,15 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb)
#endif
}
+static inline bool skb_wifi_acked_valid(const struct sk_buff *skb)
+{
+#ifdef CONFIG_WIRELESS
+ return skb->wifi_acked_valid;
+#else
+ return 0;
+#endif
+}
+
/**
* skb_unref - decrement the skb's reference count
* @skb: buffer
@@ -3234,7 +3253,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
void napi_consume_skb(struct sk_buff *skb, int budget);
void napi_skb_free_stolen_head(struct sk_buff *skb);
-void __kfree_skb_defer(struct sk_buff *skb);
+void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason);
/**
* __dev_alloc_pages - allocate page for network Rx
@@ -3386,6 +3405,18 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
+static inline void
+napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
+{
+ struct page *page = skb_frag_page(frag);
+
+#ifdef CONFIG_PAGE_POOL
+ if (recycle && page_pool_return_skb_page(page, napi_safe))
+ return;
+#endif
+ put_page(page);
+}
+
/**
* __skb_frag_unref - release a reference on a paged fragment.
* @frag: the paged fragment
@@ -3396,13 +3427,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
*/
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
- struct page *page = skb_frag_page(frag);
-
-#ifdef CONFIG_PAGE_POOL
- if (recycle && page_pool_return_skb_page(page))
- return;
-#endif
- put_page(page);
+ napi_frag_unref(frag, recycle, false);
}
/**
@@ -4704,7 +4729,7 @@ static inline void nf_reset_ct(struct sk_buff *skb)
static inline void nf_reset_trace(struct sk_buff *skb)
{
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
skb->nf_trace = 0;
#endif
}
@@ -4724,7 +4749,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
dst->_nfct = src->_nfct;
nf_conntrack_get(skb_nfct(src));
#endif
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
if (copy)
dst->nf_trace = src->nf_trace;
#endif
@@ -5043,7 +5068,19 @@ static inline void skb_reset_redirect(struct sk_buff *skb)
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
{
+#if IS_ENABLED(CONFIG_IP_SCTP)
return skb->csum_not_inet;
+#else
+ return 0;
+#endif
+}
+
+static inline void skb_reset_csum_not_inet(struct sk_buff *skb)
+{
+ skb->ip_summed = CHECKSUM_NONE;
+#if IS_ENABLED(CONFIG_IP_SCTP)
+ skb->csum_not_inet = 0;
+#endif
}
static inline void skb_set_kcov_handle(struct sk_buff *skb,
@@ -5063,12 +5100,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
}
-#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
+#ifdef CONFIG_PAGE_POOL
skb->pp_recycle = 1;
-}
#endif
+}
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index fd0b0605cf90..b2b28180dff7 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -6,6 +6,7 @@
#include <linux/regmap.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
+#include <linux/netdevice.h>
#define MTK_WED_TX_QUEUES 2
#define MTK_WED_RX_QUEUES 2
@@ -179,6 +180,8 @@ struct mtk_wed_ops {
u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask);
void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
+ int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev,
+ enum tc_setup_type type, void *type_data);
};
extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
@@ -237,6 +240,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
(_dev)->ops->msg_update(_dev, _id, _msg, _len)
#define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev)
#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
+#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \
+ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data)
#else
static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
{
@@ -255,6 +260,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
#define mtk_wed_device_stop(_dev) do {} while (0)
#define mtk_wed_device_dma_reset(_dev) do {} while (0)
+#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP
#endif
#endif
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index dafa001e9e7a..225751a8fd8e 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -186,6 +186,24 @@ struct stmmac_safety_feature_cfg {
u32 tmouten;
};
+/* Addresses that may be customized by a platform */
+struct dwmac4_addrs {
+ u32 dma_chan;
+ u32 dma_chan_offset;
+ u32 mtl_chan;
+ u32 mtl_chan_offset;
+ u32 mtl_ets_ctrl;
+ u32 mtl_ets_ctrl_offset;
+ u32 mtl_txq_weight;
+ u32 mtl_txq_weight_offset;
+ u32 mtl_send_slp_cred;
+ u32 mtl_send_slp_cred_offset;
+ u32 mtl_high_cred;
+ u32 mtl_high_cred_offset;
+ u32 mtl_low_cred;
+ u32 mtl_low_cred_offset;
+};
+
struct plat_stmmacenet_data {
int bus_id;
int phy_addr;
@@ -274,5 +292,6 @@ struct plat_stmmacenet_data {
bool use_phy_wol;
bool sph_disable;
bool serdes_up_after_phy_linkup;
+ const struct dwmac4_addrs *dwmac4_addrs;
};
#endif
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6ed9b4d546a7..d5311ceb21c6 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -954,6 +954,7 @@ enum {
HCI_CONN_STK_ENCRYPT,
HCI_CONN_AUTH_INITIATOR,
HCI_CONN_DROP,
+ HCI_CONN_CANCEL,
HCI_CONN_PARAM_REMOVAL_PEND,
HCI_CONN_NEW_LINK_KEY,
HCI_CONN_SCANNING,
diff --git a/include/net/bonding.h b/include/net/bonding.h
index ea36ab7f9e72..c3843239517d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -761,13 +761,17 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
#if IS_ENABLED(CONFIG_IPV6)
static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip)
{
+ struct in6_addr mcaddr;
int i;
- for (i = 0; i < BOND_MAX_NS_TARGETS; i++)
- if (ipv6_addr_equal(&targets[i], ip))
+ for (i = 0; i < BOND_MAX_NS_TARGETS; i++) {
+ addrconf_addr_solict_mult(&targets[i], &mcaddr);
+ if ((ipv6_addr_equal(&targets[i], ip)) ||
+ (ipv6_addr_equal(&mcaddr, ip)))
return i;
else if (ipv6_addr_any(&targets[i]))
break;
+ }
return -1;
}
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
new file mode 100644
index 000000000000..a2b953b57689
--- /dev/null
+++ b/include/net/dropreason-core.h
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_DROPREASON_CORE_H
+#define _LINUX_DROPREASON_CORE_H
+
+#define DEFINE_DROP_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(PKT_TOO_SMALL) \
+ FN(TCP_CSUM) \
+ FN(SOCKET_FILTER) \
+ FN(UDP_CSUM) \
+ FN(NETFILTER_DROP) \
+ FN(OTHERHOST) \
+ FN(IP_CSUM) \
+ FN(IP_INHDR) \
+ FN(IP_RPFILTER) \
+ FN(UNICAST_IN_L2_MULTICAST) \
+ FN(XFRM_POLICY) \
+ FN(IP_NOPROTO) \
+ FN(SOCKET_RCVBUFF) \
+ FN(PROTO_MEM) \
+ FN(TCP_MD5NOTFOUND) \
+ FN(TCP_MD5UNEXPECTED) \
+ FN(TCP_MD5FAILURE) \
+ FN(SOCKET_BACKLOG) \
+ FN(TCP_FLAGS) \
+ FN(TCP_ZEROWINDOW) \
+ FN(TCP_OLD_DATA) \
+ FN(TCP_OVERWINDOW) \
+ FN(TCP_OFOMERGE) \
+ FN(TCP_RFC7323_PAWS) \
+ FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_RESET) \
+ FN(TCP_INVALID_SYN) \
+ FN(TCP_CLOSE) \
+ FN(TCP_FASTOPEN) \
+ FN(TCP_OLD_ACK) \
+ FN(TCP_TOO_OLD_ACK) \
+ FN(TCP_ACK_UNSENT_DATA) \
+ FN(TCP_OFO_QUEUE_PRUNE) \
+ FN(TCP_OFO_DROP) \
+ FN(IP_OUTNOROUTES) \
+ FN(BPF_CGROUP_EGRESS) \
+ FN(IPV6DISABLED) \
+ FN(NEIGH_CREATEFAIL) \
+ FN(NEIGH_FAILED) \
+ FN(NEIGH_QUEUEFULL) \
+ FN(NEIGH_DEAD) \
+ FN(TC_EGRESS) \
+ FN(QDISC_DROP) \
+ FN(CPU_BACKLOG) \
+ FN(XDP) \
+ FN(TC_INGRESS) \
+ FN(UNHANDLED_PROTO) \
+ FN(SKB_CSUM) \
+ FN(SKB_GSO_SEG) \
+ FN(SKB_UCOPY_FAULT) \
+ FN(DEV_HDR) \
+ FN(DEV_READY) \
+ FN(FULL_RING) \
+ FN(NOMEM) \
+ FN(HDR_TRUNC) \
+ FN(TAP_FILTER) \
+ FN(TAP_TXFILTER) \
+ FN(ICMP_CSUM) \
+ FN(INVALID_PROTO) \
+ FN(IP_INADDRERRORS) \
+ FN(IP_INNOROUTES) \
+ FN(PKT_TOO_BIG) \
+ FN(DUP_FRAG) \
+ FN(FRAG_REASM_TIMEOUT) \
+ FN(FRAG_TOO_FAR) \
+ FN(TCP_MINTTL) \
+ FN(IPV6_BAD_EXTHDR) \
+ FN(IPV6_NDISC_FRAG) \
+ FN(IPV6_NDISC_HOP_LIMIT) \
+ FN(IPV6_NDISC_BAD_CODE) \
+ FN(IPV6_NDISC_BAD_OPTIONS) \
+ FN(IPV6_NDISC_NS_OTHERHOST) \
+ FNe(MAX)
+
+/**
+ * enum skb_drop_reason - the reasons of skb drops
+ *
+ * The reason of skb drop, which is used in kfree_skb_reason().
+ */
+enum skb_drop_reason {
+ /**
+ * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
+ */
+ SKB_NOT_DROPPED_YET = 0,
+ /** @SKB_CONSUMED: packet has been consumed */
+ SKB_CONSUMED,
+ /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
+ SKB_DROP_REASON_NOT_SPECIFIED,
+ /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ SKB_DROP_REASON_NO_SOCKET,
+ /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
+ SKB_DROP_REASON_PKT_TOO_SMALL,
+ /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
+ SKB_DROP_REASON_TCP_CSUM,
+ /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
+ SKB_DROP_REASON_SOCKET_FILTER,
+ /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
+ SKB_DROP_REASON_UDP_CSUM,
+ /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
+ SKB_DROP_REASON_NETFILTER_DROP,
+ /**
+ * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host
+ * (interface is in promisc mode)
+ */
+ SKB_DROP_REASON_OTHERHOST,
+ /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */
+ SKB_DROP_REASON_IP_CSUM,
+ /**
+ * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see
+ * IPSTATS_MIB_INHDRERRORS)
+ */
+ SKB_DROP_REASON_IP_INHDR,
+ /**
+ * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the
+ * document for rp_filter in ip-sysctl.rst for more information
+ */
+ SKB_DROP_REASON_IP_RPFILTER,
+ /**
+ * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is
+ * multicast, but L3 is unicast.
+ */
+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
+ /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */
+ SKB_DROP_REASON_XFRM_POLICY,
+ /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
+ SKB_DROP_REASON_IP_NOPROTO,
+ /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
+ SKB_DROP_REASON_SOCKET_RCVBUFF,
+ /**
+ * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
+ * drop out of udp_memory_allocated.
+ */
+ SKB_DROP_REASON_PROTO_MEM,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
+ * corresponding to LINUX_MIB_TCPMD5NOTFOUND
+ */
+ SKB_DROP_REASON_TCP_MD5NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting
+ * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED
+ */
+ SKB_DROP_REASON_TCP_MD5UNEXPECTED,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding
+ * to LINUX_MIB_TCPMD5FAILURE
+ */
+ SKB_DROP_REASON_TCP_MD5FAILURE,
+ /**
+ * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
+ * see LINUX_MIB_TCPBACKLOGDROP)
+ */
+ SKB_DROP_REASON_SOCKET_BACKLOG,
+ /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
+ SKB_DROP_REASON_TCP_FLAGS,
+ /**
+ * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
+ * see LINUX_MIB_TCPZEROWINDOWDROP
+ */
+ SKB_DROP_REASON_TCP_ZEROWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
+ * received before (spurious retrans may happened), see
+ * LINUX_MIB_DELAYEDACKLOST
+ */
+ SKB_DROP_REASON_TCP_OLD_DATA,
+ /**
+ * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window,
+ * the seq of the first byte exceed the right edges of receive
+ * window
+ */
+ SKB_DROP_REASON_TCP_OVERWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo
+ * queue, corresponding to LINUX_MIB_TCPOFOMERGE
+ */
+ SKB_DROP_REASON_TCP_OFOMERGE,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
+ * LINUX_MIB_PAWSESTABREJECTED
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
+ SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
+ SKB_DROP_REASON_TCP_RESET,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected
+ * SYN flag
+ */
+ SKB_DROP_REASON_TCP_INVALID_SYN,
+ /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */
+ SKB_DROP_REASON_TCP_CLOSE,
+ /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */
+ SKB_DROP_REASON_TCP_FASTOPEN,
+ /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
+ SKB_DROP_REASON_TCP_OLD_ACK,
+ /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
+ SKB_DROP_REASON_TCP_TOO_OLD_ACK,
+ /**
+ * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
+ * sent yet
+ */
+ SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,
+ /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,
+ /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */
+ SKB_DROP_REASON_TCP_OFO_DROP,
+ /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */
+ SKB_DROP_REASON_IP_OUTNOROUTES,
+ /**
+ * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB
+ * eBPF program
+ */
+ SKB_DROP_REASON_BPF_CGROUP_EGRESS,
+ /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */
+ SKB_DROP_REASON_IPV6DISABLED,
+ /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */
+ SKB_DROP_REASON_NEIGH_CREATEFAIL,
+ /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */
+ SKB_DROP_REASON_NEIGH_FAILED,
+ /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */
+ SKB_DROP_REASON_NEIGH_QUEUEFULL,
+ /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
+ SKB_DROP_REASON_NEIGH_DEAD,
+ /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
+ SKB_DROP_REASON_TC_EGRESS,
+ /**
+ * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
+ * failed to enqueue to current qdisc)
+ */
+ SKB_DROP_REASON_QDISC_DROP,
+ /**
+ * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
+ * backlog queue. This can be caused by backlog queue full (see
+ * netdev_max_backlog in net.rst) or RPS flow limit
+ */
+ SKB_DROP_REASON_CPU_BACKLOG,
+ /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
+ SKB_DROP_REASON_XDP,
+ /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
+ SKB_DROP_REASON_TC_INGRESS,
+ /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */
+ SKB_DROP_REASON_UNHANDLED_PROTO,
+ /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */
+ SKB_DROP_REASON_SKB_CSUM,
+ /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
+ SKB_DROP_REASON_SKB_GSO_SEG,
+ /**
+ * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
+ * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
+ */
+ SKB_DROP_REASON_SKB_UCOPY_FAULT,
+ /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */
+ SKB_DROP_REASON_DEV_HDR,
+ /**
+ * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to
+ * any of its data structure that is not up/ready/initialized,
+ * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq]
+ * is not initialized
+ */
+ SKB_DROP_REASON_DEV_READY,
+ /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */
+ SKB_DROP_REASON_FULL_RING,
+ /** @SKB_DROP_REASON_NOMEM: error due to OOM */
+ SKB_DROP_REASON_NOMEM,
+ /**
+ * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from
+ * networking data, e.g., failed to pull the protocol header from
+ * frags via pskb_may_pull()
+ */
+ SKB_DROP_REASON_HDR_TRUNC,
+ /**
+ * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached
+ * to tun/tap, e.g., via TUNSETFILTEREBPF
+ */
+ SKB_DROP_REASON_TAP_FILTER,
+ /**
+ * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at
+ * tun/tap, e.g., check_filter()
+ */
+ SKB_DROP_REASON_TAP_TXFILTER,
+ /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */
+ SKB_DROP_REASON_ICMP_CSUM,
+ /**
+ * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211,
+ * such as a broadcasts ICMP_TIMESTAMP
+ */
+ SKB_DROP_REASON_INVALID_PROTO,
+ /**
+ * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INADDRERRORS,
+ /**
+ * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INNOROUTES,
+ /**
+ * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
+ * MTU)
+ */
+ SKB_DROP_REASON_PKT_TOO_BIG,
+ /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */
+ SKB_DROP_REASON_DUP_FRAG,
+ /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */
+ SKB_DROP_REASON_FRAG_REASM_TIMEOUT,
+ /**
+ * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far.
+ * (/proc/sys/net/ipv4/ipfrag_max_dist)
+ */
+ SKB_DROP_REASON_FRAG_TOO_FAR,
+ /**
+ * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below
+ * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
+ */
+ SKB_DROP_REASON_TCP_MINTTL,
+ /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
+ SKB_DROP_REASON_IPV6_BAD_EXTHDR,
+ /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
+ SKB_DROP_REASON_IPV6_NDISC_FRAG,
+ /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
+ SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
+ /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
+ SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
+ /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
+ SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
+ /**
+ * @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION
+ * for another host.
+ */
+ SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
+ /**
+ * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
+ * shouldn't be used as a real 'reason' - only for tracing code gen
+ */
+ SKB_DROP_REASON_MAX,
+
+ /**
+ * @SKB_DROP_REASON_SUBSYS_MASK: subsystem mask in drop reasons,
+ * see &enum skb_drop_reason_subsys
+ */
+ SKB_DROP_REASON_SUBSYS_MASK = 0xffff0000,
+};
+
+#define SKB_DROP_REASON_SUBSYS_SHIFT 16
+
+#define SKB_DR_INIT(name, reason) \
+ enum skb_drop_reason name = SKB_DROP_REASON_##reason
+#define SKB_DR(name) \
+ SKB_DR_INIT(name, NOT_SPECIFIED)
+#define SKB_DR_SET(name, reason) \
+ (name = SKB_DROP_REASON_##reason)
+#define SKB_DR_OR(name, reason) \
+ do { \
+ if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
+ name == SKB_NOT_DROPPED_YET) \
+ SKB_DR_SET(name, reason); \
+ } while (0)
+
+#endif
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index c0a3ea806cd5..685fb37df8e8 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -2,362 +2,42 @@
#ifndef _LINUX_DROPREASON_H
#define _LINUX_DROPREASON_H
-
-#define DEFINE_DROP_REASON(FN, FNe) \
- FN(NOT_SPECIFIED) \
- FN(NO_SOCKET) \
- FN(PKT_TOO_SMALL) \
- FN(TCP_CSUM) \
- FN(SOCKET_FILTER) \
- FN(UDP_CSUM) \
- FN(NETFILTER_DROP) \
- FN(OTHERHOST) \
- FN(IP_CSUM) \
- FN(IP_INHDR) \
- FN(IP_RPFILTER) \
- FN(UNICAST_IN_L2_MULTICAST) \
- FN(XFRM_POLICY) \
- FN(IP_NOPROTO) \
- FN(SOCKET_RCVBUFF) \
- FN(PROTO_MEM) \
- FN(TCP_MD5NOTFOUND) \
- FN(TCP_MD5UNEXPECTED) \
- FN(TCP_MD5FAILURE) \
- FN(SOCKET_BACKLOG) \
- FN(TCP_FLAGS) \
- FN(TCP_ZEROWINDOW) \
- FN(TCP_OLD_DATA) \
- FN(TCP_OVERWINDOW) \
- FN(TCP_OFOMERGE) \
- FN(TCP_RFC7323_PAWS) \
- FN(TCP_INVALID_SEQUENCE) \
- FN(TCP_RESET) \
- FN(TCP_INVALID_SYN) \
- FN(TCP_CLOSE) \
- FN(TCP_FASTOPEN) \
- FN(TCP_OLD_ACK) \
- FN(TCP_TOO_OLD_ACK) \
- FN(TCP_ACK_UNSENT_DATA) \
- FN(TCP_OFO_QUEUE_PRUNE) \
- FN(TCP_OFO_DROP) \
- FN(IP_OUTNOROUTES) \
- FN(BPF_CGROUP_EGRESS) \
- FN(IPV6DISABLED) \
- FN(NEIGH_CREATEFAIL) \
- FN(NEIGH_FAILED) \
- FN(NEIGH_QUEUEFULL) \
- FN(NEIGH_DEAD) \
- FN(TC_EGRESS) \
- FN(QDISC_DROP) \
- FN(CPU_BACKLOG) \
- FN(XDP) \
- FN(TC_INGRESS) \
- FN(UNHANDLED_PROTO) \
- FN(SKB_CSUM) \
- FN(SKB_GSO_SEG) \
- FN(SKB_UCOPY_FAULT) \
- FN(DEV_HDR) \
- FN(DEV_READY) \
- FN(FULL_RING) \
- FN(NOMEM) \
- FN(HDR_TRUNC) \
- FN(TAP_FILTER) \
- FN(TAP_TXFILTER) \
- FN(ICMP_CSUM) \
- FN(INVALID_PROTO) \
- FN(IP_INADDRERRORS) \
- FN(IP_INNOROUTES) \
- FN(PKT_TOO_BIG) \
- FN(DUP_FRAG) \
- FN(FRAG_REASM_TIMEOUT) \
- FN(FRAG_TOO_FAR) \
- FN(TCP_MINTTL) \
- FN(IPV6_BAD_EXTHDR) \
- FN(IPV6_NDISC_FRAG) \
- FN(IPV6_NDISC_HOP_LIMIT) \
- FN(IPV6_NDISC_BAD_CODE) \
- FN(IPV6_NDISC_BAD_OPTIONS) \
- FN(IPV6_NDISC_NS_OTHERHOST) \
- FNe(MAX)
+#include <net/dropreason-core.h>
/**
- * enum skb_drop_reason - the reasons of skb drops
- *
- * The reason of skb drop, which is used in kfree_skb_reason().
+ * enum skb_drop_reason_subsys - subsystem tag for (extended) drop reasons
*/
-enum skb_drop_reason {
- /**
- * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
- */
- SKB_NOT_DROPPED_YET = 0,
- /** @SKB_CONSUMED: packet has been consumed */
- SKB_CONSUMED,
- /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
- SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
- SKB_DROP_REASON_NO_SOCKET,
- /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
- SKB_DROP_REASON_PKT_TOO_SMALL,
- /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
- SKB_DROP_REASON_TCP_CSUM,
- /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
- SKB_DROP_REASON_SOCKET_FILTER,
- /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
- SKB_DROP_REASON_UDP_CSUM,
- /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
- SKB_DROP_REASON_NETFILTER_DROP,
- /**
- * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host
- * (interface is in promisc mode)
- */
- SKB_DROP_REASON_OTHERHOST,
- /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */
- SKB_DROP_REASON_IP_CSUM,
- /**
- * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see
- * IPSTATS_MIB_INHDRERRORS)
- */
- SKB_DROP_REASON_IP_INHDR,
- /**
- * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the
- * document for rp_filter in ip-sysctl.rst for more information
- */
- SKB_DROP_REASON_IP_RPFILTER,
- /**
- * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is
- * multicast, but L3 is unicast.
- */
- SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
- /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */
- SKB_DROP_REASON_XFRM_POLICY,
- /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
- SKB_DROP_REASON_IP_NOPROTO,
- /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
- SKB_DROP_REASON_SOCKET_RCVBUFF,
- /**
- * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
- * drop out of udp_memory_allocated.
- */
- SKB_DROP_REASON_PROTO_MEM,
- /**
- * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
- * corresponding to LINUX_MIB_TCPMD5NOTFOUND
- */
- SKB_DROP_REASON_TCP_MD5NOTFOUND,
- /**
- * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting
- * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED
- */
- SKB_DROP_REASON_TCP_MD5UNEXPECTED,
- /**
- * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding
- * to LINUX_MIB_TCPMD5FAILURE
- */
- SKB_DROP_REASON_TCP_MD5FAILURE,
- /**
- * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
- * see LINUX_MIB_TCPBACKLOGDROP)
- */
- SKB_DROP_REASON_SOCKET_BACKLOG,
- /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
- SKB_DROP_REASON_TCP_FLAGS,
- /**
- * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
- * see LINUX_MIB_TCPZEROWINDOWDROP
- */
- SKB_DROP_REASON_TCP_ZEROWINDOW,
- /**
- * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
- * received before (spurious retrans may happened), see
- * LINUX_MIB_DELAYEDACKLOST
- */
- SKB_DROP_REASON_TCP_OLD_DATA,
- /**
- * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window,
- * the seq of the first byte exceed the right edges of receive
- * window
- */
- SKB_DROP_REASON_TCP_OVERWINDOW,
- /**
- * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo
- * queue, corresponding to LINUX_MIB_TCPOFOMERGE
- */
- SKB_DROP_REASON_TCP_OFOMERGE,
- /**
- * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
- */
- SKB_DROP_REASON_TCP_RFC7323_PAWS,
- /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
- SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
- /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
- SKB_DROP_REASON_TCP_RESET,
- /**
- * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected
- * SYN flag
- */
- SKB_DROP_REASON_TCP_INVALID_SYN,
- /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */
- SKB_DROP_REASON_TCP_CLOSE,
- /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */
- SKB_DROP_REASON_TCP_FASTOPEN,
- /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
- SKB_DROP_REASON_TCP_OLD_ACK,
- /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
- SKB_DROP_REASON_TCP_TOO_OLD_ACK,
- /**
- * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
- * sent yet
- */
- SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,
- /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */
- SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,
- /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */
- SKB_DROP_REASON_TCP_OFO_DROP,
- /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */
- SKB_DROP_REASON_IP_OUTNOROUTES,
- /**
- * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB
- * eBPF program
- */
- SKB_DROP_REASON_BPF_CGROUP_EGRESS,
- /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */
- SKB_DROP_REASON_IPV6DISABLED,
- /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */
- SKB_DROP_REASON_NEIGH_CREATEFAIL,
- /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */
- SKB_DROP_REASON_NEIGH_FAILED,
- /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */
- SKB_DROP_REASON_NEIGH_QUEUEFULL,
- /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
- SKB_DROP_REASON_NEIGH_DEAD,
- /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
- SKB_DROP_REASON_TC_EGRESS,
- /**
- * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
- * failed to enqueue to current qdisc)
- */
- SKB_DROP_REASON_QDISC_DROP,
- /**
- * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
- * backlog queue. This can be caused by backlog queue full (see
- * netdev_max_backlog in net.rst) or RPS flow limit
- */
- SKB_DROP_REASON_CPU_BACKLOG,
- /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
- SKB_DROP_REASON_XDP,
- /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
- SKB_DROP_REASON_TC_INGRESS,
- /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */
- SKB_DROP_REASON_UNHANDLED_PROTO,
- /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */
- SKB_DROP_REASON_SKB_CSUM,
- /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
- SKB_DROP_REASON_SKB_GSO_SEG,
- /**
- * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
- * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
- */
- SKB_DROP_REASON_SKB_UCOPY_FAULT,
- /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */
- SKB_DROP_REASON_DEV_HDR,
- /**
- * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to
- * any of its data structure that is not up/ready/initialized,
- * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq]
- * is not initialized
- */
- SKB_DROP_REASON_DEV_READY,
- /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */
- SKB_DROP_REASON_FULL_RING,
- /** @SKB_DROP_REASON_NOMEM: error due to OOM */
- SKB_DROP_REASON_NOMEM,
- /**
- * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from
- * networking data, e.g., failed to pull the protocol header from
- * frags via pskb_may_pull()
- */
- SKB_DROP_REASON_HDR_TRUNC,
- /**
- * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached
- * to tun/tap, e.g., via TUNSETFILTEREBPF
- */
- SKB_DROP_REASON_TAP_FILTER,
- /**
- * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at
- * tun/tap, e.g., check_filter()
- */
- SKB_DROP_REASON_TAP_TXFILTER,
- /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */
- SKB_DROP_REASON_ICMP_CSUM,
- /**
- * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211,
- * such as a broadcasts ICMP_TIMESTAMP
- */
- SKB_DROP_REASON_INVALID_PROTO,
- /**
- * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to
- * IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_IP_INADDRERRORS,
- /**
- * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to
- * IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_IP_INNOROUTES,
- /**
- * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
- * MTU)
- */
- SKB_DROP_REASON_PKT_TOO_BIG,
- /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */
- SKB_DROP_REASON_DUP_FRAG,
- /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */
- SKB_DROP_REASON_FRAG_REASM_TIMEOUT,
- /**
- * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far.
- * (/proc/sys/net/ipv4/ipfrag_max_dist)
- */
- SKB_DROP_REASON_FRAG_TOO_FAR,
+enum skb_drop_reason_subsys {
+ /** @SKB_DROP_REASON_SUBSYS_CORE: core drop reasons defined above */
+ SKB_DROP_REASON_SUBSYS_CORE,
+
/**
- * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below
- * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
- */
- SKB_DROP_REASON_TCP_MINTTL,
- /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
- SKB_DROP_REASON_IPV6_BAD_EXTHDR,
- /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
- SKB_DROP_REASON_IPV6_NDISC_FRAG,
- /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
- SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
- /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
- SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
- /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
- SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
- /** @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION
- * for another host.
+ * @SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE: mac80211 drop reasons
+ * for unusable frames, see net/mac80211/drop.h
*/
- SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
+ SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
+
/**
- * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
- * used as a real 'reason'
+ * @SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR: mac80211 drop reasons
+ * for frames still going to monitor, see net/mac80211/drop.h
*/
- SKB_DROP_REASON_MAX,
+ SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
+
+ /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */
+ SKB_DROP_REASON_SUBSYS_NUM
+};
+
+struct drop_reason_list {
+ const char * const *reasons;
+ size_t n_reasons;
};
-#define SKB_DR_INIT(name, reason) \
- enum skb_drop_reason name = SKB_DROP_REASON_##reason
-#define SKB_DR(name) \
- SKB_DR_INIT(name, NOT_SPECIFIED)
-#define SKB_DR_SET(name, reason) \
- (name = SKB_DROP_REASON_##reason)
-#define SKB_DR_OR(name, reason) \
- do { \
- if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
- name == SKB_NOT_DROPPED_YET) \
- SKB_DR_SET(name, reason); \
- } while (0)
+/* Note: due to dynamic registrations, access must be under RCU */
+extern const struct drop_reason_list __rcu *
+drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM];
-extern const char * const drop_reasons[];
+void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
+ const struct drop_reason_list *list);
+void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys);
#endif
diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h
new file mode 100644
index 000000000000..361811750a54
--- /dev/null
+++ b/include/net/dsa_stubs.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * include/net/dsa_stubs.h - Stubs for the Distributed Switch Architecture framework
+ */
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
+#include <net/dsa.h>
+
+#if IS_ENABLED(CONFIG_NET_DSA)
+
+extern const struct dsa_stubs *dsa_stubs;
+
+struct dsa_stubs {
+ int (*master_hwtstamp_validate)(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+};
+
+static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ if (!netdev_uses_dsa(dev))
+ return 0;
+
+ /* rtnl_lock() is a sufficient guarantee, because as long as
+ * netdev_uses_dsa() returns true, the dsa_core module is still
+ * registered, and so, dsa_unregister_stubs() couldn't have run.
+ * For netdev_uses_dsa() to start returning false, it would imply that
+ * dsa_master_teardown() has executed, which requires rtnl_lock().
+ */
+ ASSERT_RTNL();
+
+ return dsa_stubs->master_hwtstamp_validate(dev, config, extack);
+}
+
+#else
+
+static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+#endif
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 5ccf52ef8809..85b2281576ed 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -14,7 +14,9 @@ struct sk_buff;
/**
* struct flow_dissector_key_control:
- * @thoff: Transport header offset
+ * @thoff: Transport header offset
+ * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_*
+ * @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION)
*/
struct flow_dissector_key_control {
u16 thoff;
@@ -36,8 +38,9 @@ enum flow_dissect_ret {
/**
* struct flow_dissector_key_basic:
- * @n_proto: Network header protocol (eg. IPv4/IPv6)
+ * @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
+ * @padding: Unused
*/
struct flow_dissector_key_basic {
__be16 n_proto;
@@ -135,6 +138,7 @@ struct flow_dissector_key_tipc {
* struct flow_dissector_key_addrs:
* @v4addrs: IPv4 addresses
* @v6addrs: IPv6 addresses
+ * @tipckey: TIPC key
*/
struct flow_dissector_key_addrs {
union {
@@ -145,14 +149,12 @@ struct flow_dissector_key_addrs {
};
/**
- * flow_dissector_key_arp:
- * @ports: Operation, source and target addresses for an ARP header
- * for Ethernet hardware addresses and IPv4 protocol addresses
- * sip: Sender IP address
- * tip: Target IP address
- * op: Operation
- * sha: Sender hardware address
- * tpa: Target hardware address
+ * struct flow_dissector_key_arp:
+ * @sip: Sender IP address
+ * @tip: Target IP address
+ * @op: Operation
+ * @sha: Sender hardware address
+ * @tha: Target hardware address
*/
struct flow_dissector_key_arp {
__u32 sip;
@@ -163,10 +165,10 @@ struct flow_dissector_key_arp {
};
/**
- * flow_dissector_key_tp_ports:
- * @ports: port numbers of Transport header
- * src: source port number
- * dst: destination port number
+ * struct flow_dissector_key_ports:
+ * @ports: port numbers of Transport header
+ * @src: source port number
+ * @dst: destination port number
*/
struct flow_dissector_key_ports {
union {
@@ -195,10 +197,10 @@ struct flow_dissector_key_ports_range {
};
/**
- * flow_dissector_key_icmp:
- * type: ICMP type
- * code: ICMP code
- * id: session identifier
+ * struct flow_dissector_key_icmp:
+ * @type: ICMP type
+ * @code: ICMP code
+ * @id: Session identifier
*/
struct flow_dissector_key_icmp {
struct {
diff --git a/include/net/fou.h b/include/net/fou.h
index 80f56e275b08..824eb4b231fd 100644
--- a/include/net/fou.h
+++ b/include/net/fou.h
@@ -17,4 +17,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type);
+int register_fou_bpf(void);
+
#endif
diff --git a/include/net/handshake.h b/include/net/handshake.h
new file mode 100644
index 000000000000..3352b1ab43b3
--- /dev/null
+++ b/include/net/handshake.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic netlink HANDSHAKE service.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _NET_HANDSHAKE_H
+#define _NET_HANDSHAKE_H
+
+enum {
+ TLS_NO_KEYRING = 0,
+ TLS_NO_PEERID = 0,
+ TLS_NO_CERT = 0,
+ TLS_NO_PRIVKEY = 0,
+};
+
+typedef void (*tls_done_func_t)(void *data, int status,
+ key_serial_t peerid);
+
+struct tls_handshake_args {
+ struct socket *ta_sock;
+ tls_done_func_t ta_done;
+ void *ta_data;
+ unsigned int ta_timeout_ms;
+ key_serial_t ta_keyring;
+ key_serial_t ta_my_cert;
+ key_serial_t ta_my_privkey;
+ unsigned int ta_num_peerids;
+ key_serial_t ta_my_peerids[5];
+};
+
+int tls_client_hello_anon(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+
+bool tls_handshake_cancel(struct sock *sk);
+
+#endif /* _NET_HANDSHAKE_H */
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b23ddec3cd5c..325ad893f624 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -7,7 +7,7 @@
#include <linux/in6.h>
#include <linux/rbtree_types.h>
#include <linux/refcount.h>
-#include <net/dropreason.h>
+#include <net/dropreason-core.h>
/* Per netns frag queues directory */
struct fqdir {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 255b32a90850..ed4b6ad3fcac 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -57,6 +57,13 @@ struct ip_tunnel_key {
__u8 flow_flags;
};
+struct ip_tunnel_encap {
+ u16 type;
+ u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
/* Flags for ip_tunnel_info mode. */
#define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
#define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
@@ -75,6 +82,7 @@ struct ip_tunnel_key {
struct ip_tunnel_info {
struct ip_tunnel_key key;
+ struct ip_tunnel_encap encap;
#ifdef CONFIG_DST_CACHE
struct dst_cache dst_cache;
#endif
@@ -92,13 +100,6 @@ struct ip_tunnel_6rd_parm {
};
#endif
-struct ip_tunnel_encap {
- u16 type;
- u16 flags;
- __be16 sport;
- __be16 dport;
-};
-
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
@@ -299,6 +300,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key);
+void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
@@ -377,22 +379,23 @@ static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
return hlen;
}
-static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+static inline int ip_tunnel_encap(struct sk_buff *skb,
+ struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4)
{
const struct ip_tunnel_encap_ops *ops;
int ret = -EINVAL;
- if (t->encap.type == TUNNEL_ENCAP_NONE)
+ if (e->type == TUNNEL_ENCAP_NONE)
return 0;
- if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+ if (e->type >= MAX_IPTUN_ENCAP_OPS)
return -EINVAL;
rcu_read_lock();
- ops = rcu_dereference(iptun_encaps[t->encap.type]);
+ ops = rcu_dereference(iptun_encaps[e->type]);
if (likely(ops && ops->build_header))
- ret = ops->build_header(skb, &t->encap, protocol, fl4);
+ ret = ops->build_header(skb, e, protocol, fl4);
rcu_read_unlock();
return ret;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 56189e4252da..96c120160f15 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -145,6 +145,7 @@ struct gdma_general_req {
}; /* HW DATA */
#define GDMA_MESSAGE_V1 1
+#define GDMA_MESSAGE_V2 2
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -354,6 +355,9 @@ struct gdma_context {
struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
+ /* L2 MTU */
+ u16 adapter_mtu;
+
/* This maps a CQ index to the queue structure. */
unsigned int max_num_cqs;
struct gdma_queue **cq_table;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index bb11a6535d80..cd386aa7c7cc 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -36,10 +36,8 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
-#define ADAPTER_MTU_SIZE 1500
-#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
-
#define RX_BUFFERS_PER_QUEUE 512
+#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
@@ -282,7 +280,6 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
- dma_addr_t buf_dma_addr;
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -295,6 +292,11 @@ struct mana_recv_buf_oob {
struct gdma_posted_wqe_info wqe_inf;
};
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ + ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
@@ -304,6 +306,8 @@ struct mana_rxq {
u32 rxq_idx;
u32 datasize;
+ u32 alloc_size;
+ u32 headroom;
mana_handle_t rxobj;
@@ -322,7 +326,7 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
- struct page *xdp_save_page;
+ void *xdp_save_va; /* for reusing */
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
@@ -387,6 +391,14 @@ struct mana_port_context {
/* This points to an array of num_queues of RQ pointers. */
struct mana_rxq **rxqs;
+ /* pre-allocated rx buffer array */
+ void **rxbufs_pre;
+ dma_addr_t *das_pre;
+ int rxbpre_total;
+ u32 rxbpre_datasize;
+ u32 rxbpre_alloc_size;
+ u32 rxbpre_headroom;
+
struct bpf_prog *bpf_prog;
/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
@@ -486,6 +498,11 @@ struct mana_query_device_cfg_resp {
u16 max_num_vports;
u16 reserved;
u32 max_num_eqs;
+
+ /* response v2: */
+ u16 adapter_mtu;
+ u16 reserved2;
+ u32 reserved3;
}; /* HW DATA */
/* Query vPort Configuration */
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
new file mode 100644
index 000000000000..d68b0a483431
--- /dev/null
+++ b/include/net/netdev_queues.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NET_QUEUES_H
+#define _LINUX_NET_QUEUES_H
+
+#include <linux/netdevice.h>
+
+/**
+ * DOC: Lockless queue stopping / waking helpers.
+ *
+ * The netif_txq_maybe_stop() and __netif_txq_completed_wake()
+ * macros are designed to safely implement stopping
+ * and waking netdev queues without full lock protection.
+ *
+ * We assume that there can be no concurrent stop attempts and no concurrent
+ * wake attempts. The try-stop should happen from the xmit handler,
+ * while wake up should be triggered from NAPI poll context.
+ * The two may run concurrently (single producer, single consumer).
+ *
+ * The try-stop side is expected to run from the xmit handler and therefore
+ * it does not reschedule Tx (netif_tx_start_queue() instead of
+ * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
+ * handler may lead to xmit queue being enabled but not run.
+ * The waking side does not have similar context restrictions.
+ *
+ * The macros guarantee that rings will not remain stopped if there's
+ * space available, but they do *not* prevent false wake ups when
+ * the ring is full! Drivers should check for ring full at the start
+ * for the xmit handler.
+ *
+ * All descriptor ring indexes (and other relevant shared state) must
+ * be updated before invoking the macros.
+ */
+
+#define netif_txq_try_stop(txq, get_desc, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ netif_tx_stop_queue(txq); \
+ /* Producer index and stop bit must be visible \
+ * to consumer before we recheck. \
+ * Pairs with a barrier in __netif_txq_completed_wake(). \
+ */ \
+ smp_mb__after_atomic(); \
+ \
+ /* We need to check again in a case another \
+ * CPU has just made room available. \
+ */ \
+ _res = 0; \
+ if (unlikely(get_desc >= start_thrs)) { \
+ netif_tx_start_queue(txq); \
+ _res = -1; \
+ } \
+ _res; \
+ }) \
+
+/**
+ * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @stop_thrs: minimal number of available descriptors for queue to be left
+ * enabled
+ * @start_thrs: minimal number of descriptors to re-enable the queue, can be
+ * equal to @stop_thrs or higher to avoid frequent waking
+ *
+ * All arguments may be evaluated multiple times, beware of side effects.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ * Expected to be used from ndo_start_xmit, see the comment on top of the file.
+ *
+ * Returns:
+ * 0 if the queue was stopped
+ * 1 if the queue was left enabled
+ * -1 if the queue was re-enabled (raced with waking)
+ */
+#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ _res = 1; \
+ if (unlikely(get_desc < stop_thrs)) \
+ _res = netif_txq_try_stop(txq, get_desc, start_thrs); \
+ _res; \
+ }) \
+
+/* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if
+ * @bytes != 0, regardless of kernel config.
+ */
+static inline void
+netdev_txq_completed_mb(struct netdev_queue *dev_queue,
+ unsigned int pkts, unsigned int bytes)
+{
+ if (IS_ENABLED(CONFIG_BQL))
+ netdev_tx_completed_queue(dev_queue, pkts, bytes);
+ else if (bytes)
+ smp_mb();
+}
+
+/**
+ * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @pkts: number of packets completed
+ * @bytes: number of bytes completed
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @start_thrs: minimal number of descriptors to re-enable the queue
+ * @down_cond: down condition, predicate indicating that the queue should
+ * not be woken up even if descriptors are available
+ *
+ * All arguments may be evaluated multiple times.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ * Reports completed pkts/bytes to BQL.
+ *
+ * Returns:
+ * 0 if the queue was woken up
+ * 1 if the queue was already enabled (or disabled but @down_cond is true)
+ * -1 if the queue was left unchanged (@start_thrs not reached)
+ */
+#define __netif_txq_completed_wake(txq, pkts, bytes, \
+ get_desc, start_thrs, down_cond) \
+ ({ \
+ int _res; \
+ \
+ /* Report to BQL and piggy back on its barrier. \
+ * Barrier makes sure that anybody stopping the queue \
+ * after this point sees the new consumer index. \
+ * Pairs with barrier in netif_txq_try_stop(). \
+ */ \
+ netdev_txq_completed_mb(txq, pkts, bytes); \
+ \
+ _res = -1; \
+ if (pkts && likely(get_desc > start_thrs)) { \
+ _res = 1; \
+ if (unlikely(netif_tx_queue_stopped(txq)) && \
+ !(down_cond)) { \
+ netif_tx_wake_queue(txq); \
+ _res = 0; \
+ } \
+ } \
+ _res; \
+ })
+
+#define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \
+ __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false)
+
+/* subqueue variants follow */
+
+#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_try_stop(txq, get_desc, start_thrs); \
+ })
+
+#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
+ })
+
+#define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \
+ get_desc, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_completed_wake(txq, pkts, bytes, \
+ get_desc, start_thrs); \
+ })
+
+#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9430128aae99..1b8e305bb54a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1085,6 +1085,10 @@ struct nft_chain {
};
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
+int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem);
+int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b4af4837d80b..3cceb3e9320b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -55,6 +55,7 @@ struct netns_sysctl_ipv6 {
u64 ioam6_id_wide;
bool skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
+ u8 icmpv6_error_anycast_as_unicast;
};
struct netns_ipv6 {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index ddfa0b328677..c8ec2f34722b 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -77,6 +77,7 @@ struct page_pool_params {
unsigned int pool_size;
int nid; /* Numa node id to allocate from pages from */
struct device *dev; /* device, for DMA pre-mapping purposes */
+ struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */
enum dma_data_direction dma_dir; /* DMA mapping direction */
unsigned int max_len; /* max DMA sync memory size */
unsigned int offset; /* DMA addr offset */
@@ -239,13 +240,14 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
-bool page_pool_return_skb_page(struct page *page);
+bool page_pool_return_skb_page(struct page *page, bool napi_safe);
struct page_pool *page_pool_create(const struct page_pool_params *params);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
+void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
@@ -253,6 +255,10 @@ void page_pool_release_page(struct page_pool *pool, struct page *page);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
+static inline void page_pool_unlink_napi(struct page_pool *pool)
+{
+}
+
static inline void page_pool_destroy(struct page_pool *pool)
{
}
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bb0bd69fb655..f436688b6efc 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -166,11 +166,13 @@ struct tc_mqprio_caps {
struct tc_mqprio_qopt_offload {
/* struct tc_mqprio_qopt must always be the first element */
struct tc_mqprio_qopt qopt;
+ struct netlink_ext_ack *extack;
u16 mode;
u16 shaper;
u32 flags;
u64 min_rate[TC_QOPT_MAX_QUEUE];
u64 max_rate[TC_QOPT_MAX_QUEUE];
+ unsigned long preemptible_tcs;
};
struct tc_taprio_caps {
@@ -193,6 +195,7 @@ struct tc_taprio_sched_entry {
struct tc_taprio_qopt_offload {
struct tc_mqprio_qopt_offload mqprio;
+ struct netlink_ext_ack *extack;
u8 enable;
ktime_t base_time;
u64 cycle_time;
diff --git a/include/net/raw.h b/include/net/raw.h
index c215af02f758..32a61481a253 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -37,7 +37,7 @@ int raw_rcv(struct sock *, struct sk_buff *);
struct raw_hashinfo {
spinlock_t lock;
- struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
+ struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
};
static inline u32 raw_hashfunc(const struct net *net, u32 proto)
@@ -51,7 +51,7 @@ static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
spin_lock_init(&hashinfo->lock);
for (i = 0; i < RAW_HTABLE_SIZE; i++)
- INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i);
+ INIT_HLIST_HEAD(&hashinfo->ht[i]);
}
#ifdef CONFIG_PROC_FS
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index c335dd01a597..2a67100b2a17 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -425,11 +425,11 @@ static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
* the chunk length to indicate when to stop. Make sure
* there is room for a param header too.
*/
-#define sctp_walk_params(pos, chunk, member)\
-_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
+#define sctp_walk_params(pos, chunk)\
+_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length))
-#define _sctp_walk_params(pos, chunk, end, member)\
-for (pos.v = chunk->member;\
+#define _sctp_walk_params(pos, chunk, end)\
+for (pos.v = (u8 *)(chunk + 1);\
(pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
(void *)chunk + end) &&\
pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
@@ -452,8 +452,8 @@ for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \
_sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
#define _sctp_walk_fwdtsn(pos, chunk, end)\
-for (pos = chunk->subh.fwdtsn_hdr->skip;\
- (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
+for (pos = (void *)(chunk->subh.fwdtsn_hdr + 1);\
+ (void *)pos <= (void *)(chunk->subh.fwdtsn_hdr + 1) + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
/* External references. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a0933efd93c3..5c72d1864dd6 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -332,7 +332,7 @@ struct sctp_cookie {
* the association TCB is re-constructed from the cookie.
*/
__u32 raw_addr_list_len;
- struct sctp_init_chunk peer_init[];
+ /* struct sctp_init_chunk peer_init[]; */
};
@@ -1711,7 +1711,6 @@ struct sctp_association {
__u16 ecn_capable:1, /* Can peer do ECN? */
ipv4_address:1, /* Peer understands IPv4 addresses? */
ipv6_address:1, /* Peer understands IPv6 addresses? */
- hostname_address:1, /* Peer understands DNS addresses? */
asconf_capable:1, /* Does peer support ADDIP? */
prsctp_capable:1, /* Can peer do PR-SCTP? */
reconf_capable:1, /* Can peer do RE-CONFIG? */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5edf0038867c..8b7ed7167243 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2697,7 +2697,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
else
sock_write_timestamp(sk, kt);
- if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
+ if (sock_flag(sk, SOCK_WIFI_STATUS) && skb_wifi_acked_valid(skb))
__sock_recv_wifi_status(msg, sk, skb);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a0a91a988272..04a31643cda3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+ struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
void tcp_assign_congestion_control(struct sock *sk);
void tcp_init_congestion_control(struct sock *sk);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 41c57b8b1671..d1c5381fc95f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -8,6 +8,7 @@
#include <linux/skbuff.h> /* skb_shared_info */
#include <uapi/linux/netdev.h>
+#include <linux/bitfield.h>
/**
* DOC: XDP RX-queue information
@@ -317,35 +318,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-/* When sending xdp_frame into the network stack, then there is no
- * return point callback, which is needed to release e.g. DMA-mapping
- * resources with page_pool. Thus, have explicit function to release
- * frame resources.
- */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
-static inline void xdp_release_frame(struct xdp_frame *xdpf)
-{
- struct xdp_mem_info *mem = &xdpf->mem;
- struct skb_shared_info *sinfo;
- int i;
-
- /* Curr only page_pool needs this */
- if (mem->type != MEM_TYPE_PAGE_POOL)
- return;
-
- if (likely(!xdp_frame_has_frags(xdpf)))
- goto out;
-
- sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
-
- __xdp_release_frame(page_address(page), mem);
- }
-out:
- __xdp_release_frame(xdpf->data, mem);
-}
-
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
@@ -425,6 +397,52 @@ XDP_METADATA_KFUNC_xxx
MAX_XDP_METADATA_KFUNC,
};
+enum xdp_rss_hash_type {
+ /* First part: Individual bits for L3/L4 types */
+ XDP_RSS_L3_IPV4 = BIT(0),
+ XDP_RSS_L3_IPV6 = BIT(1),
+
+ /* The fixed (L3) IPv4 and IPv6 headers can both be followed by
+ * variable/dynamic headers, IPv4 called Options and IPv6 called
+ * Extension Headers. HW RSS type can contain this info.
+ */
+ XDP_RSS_L3_DYNHDR = BIT(2),
+
+ /* When RSS hash covers L4 then drivers MUST set XDP_RSS_L4 bit in
+ * addition to the protocol specific bit. This ease interaction with
+ * SKBs and avoids reserving a fixed mask for future L4 protocol bits.
+ */
+ XDP_RSS_L4 = BIT(3), /* L4 based hash, proto can be unknown */
+ XDP_RSS_L4_TCP = BIT(4),
+ XDP_RSS_L4_UDP = BIT(5),
+ XDP_RSS_L4_SCTP = BIT(6),
+ XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
+
+ /* Second part: RSS hash type combinations used for driver HW mapping */
+ XDP_RSS_TYPE_NONE = 0,
+ XDP_RSS_TYPE_L2 = XDP_RSS_TYPE_NONE,
+
+ XDP_RSS_TYPE_L3_IPV4 = XDP_RSS_L3_IPV4,
+ XDP_RSS_TYPE_L3_IPV6 = XDP_RSS_L3_IPV6,
+ XDP_RSS_TYPE_L3_IPV4_OPT = XDP_RSS_L3_IPV4 | XDP_RSS_L3_DYNHDR,
+ XDP_RSS_TYPE_L3_IPV6_EX = XDP_RSS_L3_IPV6 | XDP_RSS_L3_DYNHDR,
+
+ XDP_RSS_TYPE_L4_ANY = XDP_RSS_L4,
+ XDP_RSS_TYPE_L4_IPV4_TCP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
+ XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
+ XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
+ XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+
+ XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
+ XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
+ XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
+ XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+
+ XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
+ XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
+ XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
+};
+
#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 3e952e569418..d318c769b445 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -180,13 +180,8 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
if (likely(!cross_pg))
return false;
- if (pool->dma_pages_cnt) {
- return !(pool->dma_pages[addr >> PAGE_SHIFT] &
- XSK_NEXT_PG_CONTIG_MASK);
- }
-
- /* skb path */
- return addr + len > pool->addrs_cnt;
+ return pool->dma_pages_cnt &&
+ !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index d757b5e26d26..cb8fbb241879 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -11,6 +11,8 @@
#include <linux/regmap.h>
#include <net/dsa.h>
+struct tc_mqprio_qopt_offload;
+
/* Port Group IDs (PGID) are masks of destination ports.
*
* For L2 forwarding, the switch performs 3 lookups in the PGID table for each
@@ -744,9 +746,11 @@ struct ocelot_mirror {
};
struct ocelot_mm_state {
- struct mutex lock;
enum ethtool_mm_verify_status verify_status;
+ bool tx_enabled;
bool tx_active;
+ u8 preemptible_tcs;
+ u8 active_preemptible_tcs;
};
struct ocelot_port;
@@ -940,15 +944,17 @@ struct ocelot_policer {
__ocelot_target_write_ix(ocelot, target, val, reg, 0)
/* I/O */
-u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
-void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
-void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
-int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
- int count);
-u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
-void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
-void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
- u32 offset);
+u32 ocelot_port_readl(struct ocelot_port *port, enum ocelot_reg reg);
+void ocelot_port_writel(struct ocelot_port *port, u32 val, enum ocelot_reg reg);
+void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask,
+ enum ocelot_reg reg);
+int __ocelot_bulk_read_ix(struct ocelot *ocelot, enum ocelot_reg reg,
+ u32 offset, void *buf, int count);
+u32 __ocelot_read_ix(struct ocelot *ocelot, enum ocelot_reg reg, u32 offset);
+void __ocelot_write_ix(struct ocelot *ocelot, u32 val, enum ocelot_reg reg,
+ u32 offset);
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask,
+ enum ocelot_reg reg, u32 offset);
u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
u32 reg, u32 offset);
void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
@@ -1146,12 +1152,15 @@ int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix,
struct ocelot_policer *pol);
int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix);
-void ocelot_port_mm_irq(struct ocelot *ocelot, int port);
+void ocelot_mm_irq(struct ocelot *ocelot);
int ocelot_port_set_mm(struct ocelot *ocelot, int port,
struct ethtool_mm_cfg *cfg,
struct netlink_ext_ack *extack);
int ocelot_port_get_mm(struct ocelot *ocelot, int port,
struct ethtool_mm_state *state);
+int ocelot_port_mqprio(struct ocelot *ocelot, int port,
+ struct tc_mqprio_qopt_offload *mqprio);
+void ocelot_port_update_preemptible_tcs(struct ocelot *ocelot, int port);
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
int ocelot_mrp_add(struct ocelot *ocelot, int port,
diff --git a/include/trace/events/handshake.h b/include/trace/events/handshake.h
new file mode 100644
index 000000000000..8dadcab5f12a
--- /dev/null
+++ b/include/trace/events/handshake.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM handshake
+
+#if !defined(_TRACE_HANDSHAKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HANDSHAKE_H
+
+#include <linux/net.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(handshake_event_class,
+ TP_PROTO(
+ const struct net *net,
+ const struct handshake_req *req,
+ const struct sock *sk
+ ),
+ TP_ARGS(net, req, sk),
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(const void *, sk)
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->sk = sk;
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("req=%p sk=%p",
+ __entry->req, __entry->sk
+ )
+);
+#define DEFINE_HANDSHAKE_EVENT(name) \
+ DEFINE_EVENT(handshake_event_class, name, \
+ TP_PROTO( \
+ const struct net *net, \
+ const struct handshake_req *req, \
+ const struct sock *sk \
+ ), \
+ TP_ARGS(net, req, sk))
+
+DECLARE_EVENT_CLASS(handshake_fd_class,
+ TP_PROTO(
+ const struct net *net,
+ const struct handshake_req *req,
+ const struct sock *sk,
+ int fd
+ ),
+ TP_ARGS(net, req, sk, fd),
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(const void *, sk)
+ __field(int, fd)
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->sk = req->hr_sk;
+ __entry->fd = fd;
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("req=%p sk=%p fd=%d",
+ __entry->req, __entry->sk, __entry->fd
+ )
+);
+#define DEFINE_HANDSHAKE_FD_EVENT(name) \
+ DEFINE_EVENT(handshake_fd_class, name, \
+ TP_PROTO( \
+ const struct net *net, \
+ const struct handshake_req *req, \
+ const struct sock *sk, \
+ int fd \
+ ), \
+ TP_ARGS(net, req, sk, fd))
+
+DECLARE_EVENT_CLASS(handshake_error_class,
+ TP_PROTO(
+ const struct net *net,
+ const struct handshake_req *req,
+ const struct sock *sk,
+ int err
+ ),
+ TP_ARGS(net, req, sk, err),
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(const void *, sk)
+ __field(int, err)
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->sk = sk;
+ __entry->err = err;
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("req=%p sk=%p err=%d",
+ __entry->req, __entry->sk, __entry->err
+ )
+);
+#define DEFINE_HANDSHAKE_ERROR(name) \
+ DEFINE_EVENT(handshake_error_class, name, \
+ TP_PROTO( \
+ const struct net *net, \
+ const struct handshake_req *req, \
+ const struct sock *sk, \
+ int err \
+ ), \
+ TP_ARGS(net, req, sk, err))
+
+
+/*
+ * Request lifetime events
+ */
+
+DEFINE_HANDSHAKE_EVENT(handshake_submit);
+DEFINE_HANDSHAKE_ERROR(handshake_submit_err);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel_none);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel_busy);
+DEFINE_HANDSHAKE_EVENT(handshake_destruct);
+
+
+TRACE_EVENT(handshake_complete,
+ TP_PROTO(
+ const struct net *net,
+ const struct handshake_req *req,
+ const struct sock *sk,
+ int status
+ ),
+ TP_ARGS(net, req, sk, status),
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(const void *, sk)
+ __field(int, status)
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->sk = sk;
+ __entry->status = status;
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("req=%p sk=%p status=%d",
+ __entry->req, __entry->sk, __entry->status
+ )
+);
+
+/*
+ * Netlink events
+ */
+
+DEFINE_HANDSHAKE_ERROR(handshake_notify_err);
+DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_accept);
+DEFINE_HANDSHAKE_ERROR(handshake_cmd_accept_err);
+DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_done);
+DEFINE_HANDSHAKE_ERROR(handshake_cmd_done_err);
+
+#endif /* _TRACE_HANDSHAKE_H */
+
+#include <trace/define_trace.h>
diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h
index ac5c24d3beeb..e30a13be46ba 100644
--- a/include/trace/stages/stage5_get_offsets.h
+++ b/include/trace/stages/stage5_get_offsets.h
@@ -9,17 +9,30 @@
#undef __entry
#define __entry entry
+/*
+ * Fields should never declare an array: i.e. __field(int, arr[5])
+ * If they do, it will cause issues in parsing and possibly corrupt the
+ * events. To prevent that from happening, test the sizeof() a fictitious
+ * type called "struct _test_no_array_##item" which will fail if "item"
+ * contains array elements (like "arr[5]").
+ *
+ * If you hit this, use __array(int, arr, 5) instead.
+ */
#undef __field
-#define __field(type, item)
+#define __field(type, item) \
+ { (void)sizeof(struct _test_no_array_##item *); }
#undef __field_ext
-#define __field_ext(type, item, filter_type)
+#define __field_ext(type, item, filter_type) \
+ { (void)sizeof(struct _test_no_array_##item *); }
#undef __field_struct
-#define __field_struct(type, item)
+#define __field_struct(type, item) \
+ { (void)sizeof(struct _test_no_array_##item *); }
#undef __field_struct_ext
-#define __field_struct_ext(type, item, filter_type)
+#define __field_struct_ext(type, item, filter_type) \
+ { (void)sizeof(struct _test_no_array_##item *); }
#undef __array
#define __array(type, item, len)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 976b194eb775..3823100b7934 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@ enum bpf_attach_type {
BPF_PERF_EVENT,
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
+ BPF_STRUCT_OPS,
__MAX_BPF_ATTACH_TYPE
};
@@ -1108,7 +1109,7 @@ enum bpf_link_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
-/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will allow any alignment whatsoever. On platforms
* with strict alignment requirements for loads ands stores (such
* as sparc and mips) the verifier validates that all loads and
@@ -1266,6 +1267,9 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */
BPF_F_INNER_MAP = (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+ BPF_F_LINK = (1U << 13),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1403,6 +1407,11 @@ union bpf_attr {
__aligned_u64 fd_array; /* array of FDs */
__aligned_u64 core_relos;
__u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 log_true_size;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1488,6 +1497,11 @@ union bpf_attr {
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 btf_log_true_size;
};
struct {
@@ -1507,7 +1521,10 @@ union bpf_attr {
} task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
- __u32 prog_fd; /* eBPF program to attach */
+ union {
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 map_fd; /* struct_ops to attach */
+ };
union {
__u32 target_fd; /* object to attach to */
__u32 target_ifindex; /* target ifindex */
@@ -1548,12 +1565,23 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
- /* new program fd to update link with */
- __u32 new_prog_fd;
+ union {
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ /* new struct_ops map fd to update link with */
+ __u32 new_map_fd;
+ };
__u32 flags; /* extra flags */
- /* expected link's program fd; is specified only if
- * BPF_F_REPLACE flag is set in flags */
- __u32 old_prog_fd;
+ union {
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags.
+ */
+ __u32 old_prog_fd;
+ /* expected link's map fd; is specified only
+ * if BPF_F_REPLACE flag is set.
+ */
+ __u32 old_map_fd;
+ };
} link_update;
struct {
@@ -1647,17 +1675,17 @@ union bpf_attr {
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
- * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * to file *\/sys/kernel/tracing/trace* from TraceFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
- * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
- * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * Lines are discarded while *\/sys/kernel/tracing/trace* is
+ * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
- * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *\/sys/kernel/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
@@ -6379,6 +6407,9 @@ struct bpf_link_info {
struct {
__u32 ifindex;
} xdp;
+ struct {
+ __u32 map_id;
+ } struct_ops;
};
} __attribute__((aligned(8)));
@@ -7112,4 +7143,12 @@ enum {
BPF_F_TIMER_ABS = (1ULL << 0),
};
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+ /* opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __attribute__((aligned(8)));
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
new file mode 100644
index 000000000000..1de4d0b95325
--- /dev/null
+++ b/include/uapi/linux/handshake.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_HANDSHAKE_H
+#define _UAPI_LINUX_HANDSHAKE_H
+
+#define HANDSHAKE_FAMILY_NAME "handshake"
+#define HANDSHAKE_FAMILY_VERSION 1
+
+enum handshake_handler_class {
+ HANDSHAKE_HANDLER_CLASS_NONE,
+ HANDSHAKE_HANDLER_CLASS_TLSHD,
+ HANDSHAKE_HANDLER_CLASS_MAX,
+};
+
+enum handshake_msg_type {
+ HANDSHAKE_MSG_TYPE_UNSPEC,
+ HANDSHAKE_MSG_TYPE_CLIENTHELLO,
+ HANDSHAKE_MSG_TYPE_SERVERHELLO,
+};
+
+enum handshake_auth {
+ HANDSHAKE_AUTH_UNSPEC,
+ HANDSHAKE_AUTH_UNAUTH,
+ HANDSHAKE_AUTH_PSK,
+ HANDSHAKE_AUTH_X509,
+};
+
+enum {
+ HANDSHAKE_A_X509_CERT = 1,
+ HANDSHAKE_A_X509_PRIVKEY,
+
+ __HANDSHAKE_A_X509_MAX,
+ HANDSHAKE_A_X509_MAX = (__HANDSHAKE_A_X509_MAX - 1)
+};
+
+enum {
+ HANDSHAKE_A_ACCEPT_SOCKFD = 1,
+ HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+ HANDSHAKE_A_ACCEPT_MESSAGE_TYPE,
+ HANDSHAKE_A_ACCEPT_TIMEOUT,
+ HANDSHAKE_A_ACCEPT_AUTH_MODE,
+ HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
+ HANDSHAKE_A_ACCEPT_CERTIFICATE,
+
+ __HANDSHAKE_A_ACCEPT_MAX,
+ HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1)
+};
+
+enum {
+ HANDSHAKE_A_DONE_STATUS = 1,
+ HANDSHAKE_A_DONE_SOCKFD,
+ HANDSHAKE_A_DONE_REMOTE_AUTH,
+
+ __HANDSHAKE_A_DONE_MAX,
+ HANDSHAKE_A_DONE_MAX = (__HANDSHAKE_A_DONE_MAX - 1)
+};
+
+enum {
+ HANDSHAKE_CMD_READY = 1,
+ HANDSHAKE_CMD_ACCEPT,
+ HANDSHAKE_CMD_DONE,
+
+ __HANDSHAKE_CMD_MAX,
+ HANDSHAKE_CMD_MAX = (__HANDSHAKE_CMD_MAX - 1)
+};
+
+#define HANDSHAKE_MCGRP_NONE "none"
+#define HANDSHAKE_MCGRP_TLSHD "tlshd"
+
+#endif /* _UAPI_LINUX_HANDSHAKE_H */
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c9d624f528c5..f95326fce6bb 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -525,6 +525,7 @@ enum {
BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
+ BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS,
__BRIDGE_VLANDB_ENTRY_MAX,
};
#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8d679688efe0..4ac1000b0ef2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -569,6 +569,7 @@ enum {
IFLA_BRPORT_MAB,
IFLA_BRPORT_MCAST_N_GROUPS,
IFLA_BRPORT_MCAST_MAX_GROUPS,
+ IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 78c981d6a9d4..9efc42382fdb 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -59,6 +59,7 @@ struct sockaddr_ll {
#define PACKET_ROLLOVER_STATS 21
#define PACKET_FANOUT_DATA 22
#define PACKET_IGNORE_OUTGOING 23
+#define PACKET_VNET_HDR_SZ 24
#define PACKET_FANOUT_HASH 0
#define PACKET_FANOUT_LB 1
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 000eec106856..51a7addc56c6 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -719,6 +719,11 @@ enum {
#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+enum {
+ TC_FP_EXPRESS = 1,
+ TC_FP_PREEMPTIBLE = 2,
+};
+
struct tc_mqprio_qopt {
__u8 num_tc;
__u8 prio_tc_map[TC_QOPT_BITMASK + 1];
@@ -733,11 +738,22 @@ struct tc_mqprio_qopt {
#define TC_MQPRIO_F_MAX_RATE 0x8
enum {
+ TCA_MQPRIO_TC_ENTRY_UNSPEC,
+ TCA_MQPRIO_TC_ENTRY_INDEX, /* u32 */
+ TCA_MQPRIO_TC_ENTRY_FP, /* u32 */
+
+ /* add new constants above here */
+ __TCA_MQPRIO_TC_ENTRY_CNT,
+ TCA_MQPRIO_TC_ENTRY_MAX = (__TCA_MQPRIO_TC_ENTRY_CNT - 1)
+};
+
+enum {
TCA_MQPRIO_UNSPEC,
TCA_MQPRIO_MODE,
TCA_MQPRIO_SHAPER,
TCA_MQPRIO_MIN_RATE64,
TCA_MQPRIO_MAX_RATE64,
+ TCA_MQPRIO_TC_ENTRY,
__TCA_MQPRIO_MAX,
};
@@ -1236,6 +1252,7 @@ enum {
TCA_TAPRIO_TC_ENTRY_UNSPEC,
TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */
TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */
+ TCA_TAPRIO_TC_ENTRY_FP, /* u32 */
/* add new constants above here */
__TCA_TAPRIO_TC_ENTRY_CNT,
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 5af2a0300bb9..3744e4da1b2a 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -140,11 +140,11 @@ struct virtio_blk_config {
/* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */
struct virtio_blk_zoned_characteristics {
- __le32 zone_sectors;
- __le32 max_open_zones;
- __le32 max_active_zones;
- __le32 max_append_sectors;
- __le32 write_granularity;
+ __virtio32 zone_sectors;
+ __virtio32 max_open_zones;
+ __virtio32 max_active_zones;
+ __virtio32 max_append_sectors;
+ __virtio32 write_granularity;
__u8 model;
__u8 unused2[3];
} zoned;
@@ -241,11 +241,11 @@ struct virtio_blk_outhdr {
*/
struct virtio_blk_zone_descriptor {
/* Zone capacity */
- __le64 z_cap;
+ __virtio64 z_cap;
/* The starting sector of the zone */
- __le64 z_start;
+ __virtio64 z_start;
/* Zone write pointer position in sectors */
- __le64 z_wp;
+ __virtio64 z_wp;
/* Zone type */
__u8 z_type;
/* Zone state */
@@ -254,7 +254,7 @@ struct virtio_blk_zone_descriptor {
};
struct virtio_blk_zone_report {
- __le64 nr_zones;
+ __virtio64 nr_zones;
__u8 reserved[56];
struct virtio_blk_zone_descriptor zones[];
};
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 25aab8ec4f86..431c3afb2ce0 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -979,7 +979,6 @@ struct ufs_hba {
struct completion *uic_async_done;
enum ufshcd_state ufshcd_state;
- bool logical_unit_scan_finished;
u32 eh_flags;
u32 intr_mask;
u16 ee_ctrl_mask;
diff --git a/init/initramfs.c b/init/initramfs.c
index f6c112e30bd4..e7a01c2ccd1b 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -60,15 +60,8 @@ static void __init error(char *x)
message = x;
}
-static void panic_show_mem(const char *fmt, ...)
-{
- va_list args;
-
- show_mem(0, NULL);
- va_start(args, fmt);
- panic(fmt, args);
- va_end(args);
-}
+#define panic_show_mem(fmt, ...) \
+ ({ show_mem(0, NULL); panic(fmt, ##__VA_ARGS__); })
/* link hash */
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 729793ae9712..c2cde88aeed5 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c
struct hlist_node *node = cache->list.first;
hlist_del(node);
+ cache->nr_cached--;
return container_of(node, struct io_cache_entry, node);
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 722624b6d0dc..4a865f0e85d0 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -998,7 +998,7 @@ static void __io_req_complete_post(struct io_kiocb *req)
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
{
- if (req->ctx->task_complete && (issue_flags & IO_URING_F_IOWQ)) {
+ if (req->ctx->task_complete && req->ctx->submitter_task != current) {
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
@@ -2789,8 +2789,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
- mutex_unlock(&ctx->uring_lock);
io_destroy_buffers(ctx);
+ mutex_unlock(&ctx->uring_lock);
if (ctx->sq_creds)
put_cred(ctx->sq_creds);
if (ctx->submitter_task)
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 3002dc827195..a90c820ce99e 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -228,17 +228,18 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
return i;
}
- /* the head kbuf is the list itself */
+ /* protects io_buffers_cache */
+ lockdep_assert_held(&ctx->uring_lock);
+
while (!list_empty(&bl->buf_list)) {
struct io_buffer *nxt;
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
- list_del(&nxt->list);
+ list_move(&nxt->list, &ctx->io_buffers_cache);
if (++i == nbufs)
return i;
cond_resched();
}
- i++;
return i;
}
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 795facbd0e9f..55306e801081 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -726,6 +726,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
apoll = io_req_alloc_apoll(req, issue_flags);
if (!apoll)
return IO_APOLL_ABORTED;
+ req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL);
req->flags |= REQ_F_POLLED;
ipt.pt._qproc = io_async_queue_proc;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 2b8743645efc..f27f4975217d 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
unsigned int issue_flags)
{
if (!req->rsrc_node) {
- req->rsrc_node = ctx->rsrc_node;
+ io_ring_submit_lock(ctx, issue_flags);
- if (!(issue_flags & IO_URING_F_UNLOCKED)) {
- lockdep_assert_held(&ctx->uring_lock);
+ lockdep_assert_held(&ctx->uring_lock);
- io_charge_rsrc_node(ctx);
- } else {
- percpu_ref_get(&req->rsrc_node->refs);
- }
+ req->rsrc_node = ctx->rsrc_node;
+ io_charge_rsrc_node(ctx);
+ io_ring_submit_unlock(ctx, issue_flags);
}
}
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 02242614dcc7..1d3892168d32 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,8 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1588c793a715..2058e89b5ddd 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -307,8 +307,8 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
}
/* Called from syscall or from eBPF program */
-static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
@@ -386,7 +386,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
}
/* Called from syscall or from eBPF program */
-static int array_map_delete_elem(struct bpf_map *map, void *key)
+static long array_map_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
@@ -686,8 +686,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};
-static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
- void *callback_ctx, u64 flags)
+static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
+ void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
struct bpf_array *array;
@@ -871,7 +871,7 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
return 0;
}
-static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
+static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index 6350c5d35a9b..540331b610a9 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -16,13 +16,6 @@ struct bpf_bloom_filter {
struct bpf_map map;
u32 bitset_mask;
u32 hash_seed;
- /* If the size of the values in the bloom filter is u32 aligned,
- * then it is more performant to use jhash2 as the underlying hash
- * function, else we use jhash. This tracks the number of u32s
- * in an u32-aligned value size. If the value size is not u32 aligned,
- * this will be 0.
- */
- u32 aligned_u32_count;
u32 nr_hash_funcs;
unsigned long bitset[];
};
@@ -32,16 +25,15 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value,
{
u32 h;
- if (bloom->aligned_u32_count)
- h = jhash2(value, bloom->aligned_u32_count,
- bloom->hash_seed + index);
+ if (likely(value_size % 4 == 0))
+ h = jhash2(value, value_size / 4, bloom->hash_seed + index);
else
h = jhash(value, value_size, bloom->hash_seed + index);
return h & bloom->bitset_mask;
}
-static int bloom_map_peek_elem(struct bpf_map *map, void *value)
+static long bloom_map_peek_elem(struct bpf_map *map, void *value)
{
struct bpf_bloom_filter *bloom =
container_of(map, struct bpf_bloom_filter, map);
@@ -56,7 +48,7 @@ static int bloom_map_peek_elem(struct bpf_map *map, void *value)
return 0;
}
-static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags)
+static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags)
{
struct bpf_bloom_filter *bloom =
container_of(map, struct bpf_bloom_filter, map);
@@ -73,12 +65,12 @@ static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags)
return 0;
}
-static int bloom_map_pop_elem(struct bpf_map *map, void *value)
+static long bloom_map_pop_elem(struct bpf_map *map, void *value)
{
return -EOPNOTSUPP;
}
-static int bloom_map_delete_elem(struct bpf_map *map, void *value)
+static long bloom_map_delete_elem(struct bpf_map *map, void *value)
{
return -EOPNOTSUPP;
}
@@ -152,11 +144,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
bloom->nr_hash_funcs = nr_hash_funcs;
bloom->bitset_mask = bitset_mask;
- /* Check whether the value size is u32-aligned */
- if ((attr->value_size & (sizeof(u32) - 1)) == 0)
- bloom->aligned_u32_count =
- attr->value_size / sizeof(u32);
-
if (!(attr->map_flags & BPF_F_ZERO_SEED))
bloom->hash_seed = get_random_u32();
@@ -177,8 +164,8 @@ static void *bloom_map_lookup_elem(struct bpf_map *map, void *key)
return ERR_PTR(-EINVAL);
}
-static int bloom_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static long bloom_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
{
/* The eBPF program should use map_push_elem instead */
return -EINVAL;
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 9ae07aedaf23..d44fe8dd9732 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -46,8 +46,6 @@ static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
struct bpf_local_storage *local_storage;
- bool free_cgroup_storage = false;
- unsigned long flags;
rcu_read_lock();
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
@@ -57,14 +55,9 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
}
bpf_cgrp_storage_lock();
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ bpf_local_storage_destroy(local_storage);
bpf_cgrp_storage_unlock();
rcu_read_unlock();
-
- if (free_cgroup_storage)
- kfree_rcu(local_storage, rcu);
}
static struct bpf_local_storage_data *
@@ -100,8 +93,8 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
return sdata ? sdata->data : NULL;
}
-static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
struct bpf_local_storage_data *sdata;
struct cgroup *cgroup;
@@ -128,11 +121,11 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), true);
+ bpf_selem_unlink(SELEM(sdata), false);
return 0;
}
-static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
+static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
{
struct cgroup *cgroup;
int err, fd;
@@ -156,7 +149,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &cgroup_cache);
+ return bpf_local_storage_map_alloc(attr, &cgroup_cache, true);
}
static void cgroup_storage_map_free(struct bpf_map *map)
@@ -231,7 +224,7 @@ const struct bpf_func_proto bpf_cgrp_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &bpf_cgroup_btf_id[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
@@ -242,6 +235,6 @@ const struct bpf_func_proto bpf_cgrp_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &bpf_cgroup_btf_id[0],
};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 43e2619c8167..a4d93df78c75 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -57,7 +57,6 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
void bpf_inode_storage_free(struct inode *inode)
{
struct bpf_local_storage *local_storage;
- bool free_inode_storage = false;
struct bpf_storage_blob *bsb;
bsb = bpf_inode(inode);
@@ -72,13 +71,8 @@ void bpf_inode_storage_free(struct inode *inode)
return;
}
- raw_spin_lock_bh(&local_storage->lock);
- free_inode_storage = bpf_local_storage_unlink_nolock(local_storage);
- raw_spin_unlock_bh(&local_storage->lock);
+ bpf_local_storage_destroy(local_storage);
rcu_read_unlock();
-
- if (free_inode_storage)
- kfree_rcu(local_storage, rcu);
}
static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -97,8 +91,8 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
return sdata ? sdata->data : NULL;
}
-static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
struct bpf_local_storage_data *sdata;
struct file *f;
@@ -128,12 +122,12 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), true);
+ bpf_selem_unlink(SELEM(sdata), false);
return 0;
}
-static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
+static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
{
struct file *f;
int fd, err;
@@ -205,7 +199,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &inode_cache);
+ return bpf_local_storage_map_alloc(attr, &inode_cache, false);
}
static void inode_storage_map_free(struct bpf_map *map)
@@ -235,7 +229,7 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &bpf_inode_storage_btf_ids[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
@@ -246,6 +240,6 @@ const struct bpf_func_proto bpf_inode_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &bpf_inode_storage_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 5dc307bdeaeb..96856f130cbf 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = {
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
+
+struct bpf_iter_num_kern {
+ int cur; /* current value, inclusive */
+ int end; /* final value, exclusive */
+} __aligned(8);
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
+{
+ struct bpf_iter_num_kern *s = (void *)it;
+
+ BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num));
+ BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num));
+
+ BTF_TYPE_EMIT(struct btf_iter_num);
+
+ /* start == end is legit, it's an empty range and we'll just get NULL
+ * on first (and any subsequent) bpf_iter_num_next() call
+ */
+ if (start > end) {
+ s->cur = s->end = 0;
+ return -EINVAL;
+ }
+
+ /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */
+ if ((s64)end - (s64)start > BPF_MAX_LOOPS) {
+ s->cur = s->end = 0;
+ return -E2BIG;
+ }
+
+ /* user will call bpf_iter_num_next() first,
+ * which will set s->cur to exactly start value;
+ * underflow shouldn't matter
+ */
+ s->cur = start - 1;
+ s->end = end;
+
+ return 0;
+}
+
+__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it)
+{
+ struct bpf_iter_num_kern *s = (void *)it;
+
+ /* check failed initialization or if we are done (same behavior);
+ * need to be careful about overflow, so convert to s64 for checks,
+ * e.g., if s->cur == s->end == INT_MAX, we can't just do
+ * s->cur + 1 >= s->end
+ */
+ if ((s64)(s->cur + 1) >= s->end) {
+ s->cur = s->end = 0;
+ return NULL;
+ }
+
+ s->cur++;
+
+ return &s->cur;
+}
+
+__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it)
+{
+ struct bpf_iter_num_kern *s = (void *)it;
+
+ s->cur = s->end = 0;
+}
+
+__diag_pop();
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index d3ba3f2db640..47d9948d768f 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -80,8 +80,24 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
- selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
- gfp_flags | __GFP_NOWARN);
+ if (smap->bpf_ma) {
+ migrate_disable();
+ selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
+ migrate_enable();
+ if (selem)
+ /* Keep the original bpf_map_kzalloc behavior
+ * before started using the bpf_mem_cache_alloc.
+ *
+ * No need to use zero_map_value. The bpf_selem_free()
+ * only does bpf_mem_cache_free when there is
+ * no other bpf prog is using the selem.
+ */
+ memset(SDATA(selem)->data, 0, smap->map.value_size);
+ } else {
+ selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
+ gfp_flags | __GFP_NOWARN);
+ }
+
if (selem) {
if (value)
copy_map_value(&smap->map, SDATA(selem)->data, value);
@@ -95,7 +111,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
}
-void bpf_local_storage_free_rcu(struct rcu_head *rcu)
+/* rcu tasks trace callback for bpf_ma == false */
+static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
@@ -109,28 +126,66 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
kfree_rcu(local_storage, rcu);
}
-static void bpf_selem_free_fields_rcu(struct rcu_head *rcu)
+static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
- struct bpf_local_storage_elem *selem;
- struct bpf_local_storage_map *smap;
+ struct bpf_local_storage *local_storage;
- selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- /* protected by the rcu_barrier*() */
- smap = rcu_dereference_protected(SDATA(selem)->smap, true);
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- kfree(selem);
+ local_storage = container_of(rcu, struct bpf_local_storage, rcu);
+ bpf_mem_cache_raw_free(local_storage);
}
-static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu)
+static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
- /* Free directly if Tasks Trace RCU GP also implies RCU GP */
if (rcu_trace_implies_rcu_gp())
- bpf_selem_free_fields_rcu(rcu);
+ bpf_local_storage_free_rcu(rcu);
else
- call_rcu(rcu, bpf_selem_free_fields_rcu);
+ call_rcu(rcu, bpf_local_storage_free_rcu);
}
-static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+/* Handle bpf_ma == false */
+static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
+ bool vanilla_rcu)
+{
+ if (vanilla_rcu)
+ kfree_rcu(local_storage, rcu);
+ else
+ call_rcu_tasks_trace(&local_storage->rcu,
+ __bpf_local_storage_free_trace_rcu);
+}
+
+static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ bool bpf_ma, bool reuse_now)
+{
+ if (!local_storage)
+ return;
+
+ if (!bpf_ma) {
+ __bpf_local_storage_free(local_storage, reuse_now);
+ return;
+ }
+
+ if (!reuse_now) {
+ call_rcu_tasks_trace(&local_storage->rcu,
+ bpf_local_storage_free_trace_rcu);
+ return;
+ }
+
+ if (smap) {
+ migrate_disable();
+ bpf_mem_cache_free(&smap->storage_ma, local_storage);
+ migrate_enable();
+ } else {
+ /* smap could be NULL if the selem that triggered
+ * this 'local_storage' creation had been long gone.
+ * In this case, directly do call_rcu().
+ */
+ call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
+ }
+}
+
+/* rcu tasks trace callback for bpf_ma == false */
+static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
@@ -141,17 +196,66 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
kfree_rcu(selem, rcu);
}
+/* Handle bpf_ma == false */
+static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
+ bool vanilla_rcu)
+{
+ if (vanilla_rcu)
+ kfree_rcu(selem, rcu);
+ else
+ call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
+}
+
+static void bpf_selem_free_rcu(struct rcu_head *rcu)
+{
+ struct bpf_local_storage_elem *selem;
+
+ selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+ bpf_mem_cache_raw_free(selem);
+}
+
+static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_selem_free_rcu(rcu);
+ else
+ call_rcu(rcu, bpf_selem_free_rcu);
+}
+
+void bpf_selem_free(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ bool reuse_now)
+{
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+
+ if (!smap->bpf_ma) {
+ __bpf_selem_free(selem, reuse_now);
+ return;
+ }
+
+ if (!reuse_now) {
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
+ } else {
+ /* Instead of using the vanilla call_rcu(),
+ * bpf_mem_cache_free will be able to reuse selem
+ * immediately.
+ */
+ migrate_disable();
+ bpf_mem_cache_free(&smap->selem_ma, selem);
+ migrate_enable();
+ }
+}
+
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
*/
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem,
- bool uncharge_mem, bool use_trace_rcu)
+ bool uncharge_mem, bool reuse_now)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- struct btf_record *rec;
void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
@@ -192,35 +296,55 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
- /* A different RCU callback is chosen whenever we need to free
- * additional fields in selem data before freeing selem.
- * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU
- * callbacks when it has special fields, hence we can only conditionally
- * dereference smap, as by this time the map might have already been
- * freed without waiting for our call_rcu callback if it did not have
- * any special fields.
+ bpf_selem_free(selem, smap, reuse_now);
+
+ if (rcu_access_pointer(local_storage->smap) == smap)
+ RCU_INIT_POINTER(local_storage->smap, NULL);
+
+ return free_local_storage;
+}
+
+static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *storage_smap,
+ struct bpf_local_storage_elem *selem)
+{
+
+ struct bpf_local_storage_map *selem_smap;
+
+ /* local_storage->smap may be NULL. If it is, get the bpf_ma
+ * from any selem in the local_storage->list. The bpf_ma of all
+ * local_storage and selem should have the same value
+ * for the same map type.
+ *
+ * If the local_storage->list is already empty, the caller will not
+ * care about the bpf_ma value also because the caller is not
+ * responsibile to free the local_storage.
*/
- rec = smap->map.record;
- if (use_trace_rcu) {
- if (!IS_ERR_OR_NULL(rec))
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu);
- else
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
- } else {
- if (!IS_ERR_OR_NULL(rec))
- call_rcu(&selem->rcu, bpf_selem_free_fields_rcu);
- else
- kfree_rcu(selem, rcu);
+
+ if (storage_smap)
+ return storage_smap->bpf_ma;
+
+ if (!selem) {
+ struct hlist_node *n;
+
+ n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
+ bpf_rcu_lock_held());
+ if (!n)
+ return false;
+
+ selem = hlist_entry(n, struct bpf_local_storage_elem, snode);
}
+ selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- return free_local_storage;
+ return selem_smap->bpf_ma;
}
-static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
- bool use_trace_rcu)
+static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
+ bool reuse_now)
{
+ struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage *local_storage;
- bool free_local_storage = false;
+ bool bpf_ma, free_local_storage = false;
unsigned long flags;
if (unlikely(!selem_linked_to_storage_lockless(selem)))
@@ -229,19 +353,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
+ storage_smap = rcu_dereference_check(local_storage->smap,
+ bpf_rcu_lock_held());
+ bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem);
+
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, true, use_trace_rcu);
+ local_storage, selem, true, reuse_now);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- if (free_local_storage) {
- if (use_trace_rcu)
- call_rcu_tasks_trace(&local_storage->rcu,
- bpf_local_storage_free_rcu);
- else
- kfree_rcu(local_storage, rcu);
- }
+ if (free_local_storage)
+ bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
}
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -251,7 +374,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
hlist_add_head_rcu(&selem->snode, &local_storage->list);
}
-void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
@@ -281,14 +404,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
raw_spin_unlock_irqrestore(&b->lock, flags);
}
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
{
/* Always unlink from map before unlinking from local_storage
* because selem will be freed after successfully unlinked from
* the local_storage.
*/
bpf_selem_unlink_map(selem);
- __bpf_selem_unlink_storage(selem, use_trace_rcu);
+ bpf_selem_unlink_storage(selem, reuse_now);
}
/* If cacheit_lockit is false, this lookup function is lockless */
@@ -361,13 +484,21 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
- gfp_flags | __GFP_NOWARN);
+ if (smap->bpf_ma) {
+ migrate_disable();
+ storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
+ migrate_enable();
+ } else {
+ storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
+ gfp_flags | __GFP_NOWARN);
+ }
+
if (!storage) {
err = -ENOMEM;
goto uncharge;
}
+ RCU_INIT_POINTER(storage->smap, smap);
INIT_HLIST_HEAD(&storage->list);
raw_spin_lock_init(&storage->lock);
storage->owner = owner;
@@ -407,7 +538,7 @@ int bpf_local_storage_alloc(void *owner,
return 0;
uncharge:
- kfree(storage);
+ bpf_local_storage_free(storage, smap, smap->bpf_ma, true);
mem_uncharge(smap, owner, sizeof(*storage));
return err;
}
@@ -451,7 +582,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
if (err) {
- kfree(selem);
+ bpf_selem_free(selem, smap, true);
mem_uncharge(smap, owner, smap->elem_size);
return ERR_PTR(err);
}
@@ -534,7 +665,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (old_sdata) {
bpf_selem_unlink_map(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
- false, true);
+ false, false);
}
unlock:
@@ -545,7 +676,7 @@ unlock_err:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (selem) {
mem_uncharge(smap, owner, smap->elem_size);
- kfree(selem);
+ bpf_selem_free(selem, smap, true);
}
return ERR_PTR(err);
}
@@ -601,40 +732,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}
-static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)
-{
- struct bpf_local_storage_map *smap;
- unsigned int i;
- u32 nbuckets;
-
- smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
- if (!smap)
- return ERR_PTR(-ENOMEM);
- bpf_map_init_from_attr(&smap->map, attr);
-
- nbuckets = roundup_pow_of_two(num_possible_cpus());
- /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- nbuckets = max_t(u32, 2, nbuckets);
- smap->bucket_log = ilog2(nbuckets);
-
- smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
- nbuckets, GFP_USER | __GFP_NOWARN);
- if (!smap->buckets) {
- bpf_map_area_free(smap);
- return ERR_PTR(-ENOMEM);
- }
-
- for (i = 0; i < nbuckets; i++) {
- INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
- }
-
- smap->elem_size = offsetof(struct bpf_local_storage_elem,
- sdata.data[attr->value_size]);
-
- return smap;
-}
-
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
@@ -652,11 +749,16 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
return 0;
}
-bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
+void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
+ struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage_elem *selem;
- bool free_storage = false;
+ bool bpf_ma, free_storage = false;
struct hlist_node *n;
+ unsigned long flags;
+
+ storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
+ bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);
/* Neither the bpf_prog nor the bpf_map's syscall
* could be modifying the local_storage->list now.
@@ -667,6 +769,7 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from
* local_storage.
@@ -679,10 +782,12 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
* of the loop will set the free_cgroup_storage to true.
*/
free_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false, false);
+ local_storage, selem, false, true);
}
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- return free_storage;
+ if (free_storage)
+ bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
}
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -695,18 +800,71 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
return usage;
}
+/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory.
+ * A deadlock free allocator is useful for storage that the bpf prog can easily
+ * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf.
+ * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses
+ * memory immediately. To be reuse-immediate safe, the owner destruction
+ * code path needs to go through a rcu grace period before calling
+ * bpf_local_storage_destroy().
+ *
+ * When bpf_ma == false, the kmalloc and kfree are used.
+ */
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache)
+ struct bpf_local_storage_cache *cache,
+ bool bpf_ma)
{
struct bpf_local_storage_map *smap;
+ unsigned int i;
+ u32 nbuckets;
+ int err;
+
+ smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
+ if (!smap)
+ return ERR_PTR(-ENOMEM);
+ bpf_map_init_from_attr(&smap->map, attr);
+
+ nbuckets = roundup_pow_of_two(num_possible_cpus());
+ /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+ nbuckets = max_t(u32, 2, nbuckets);
+ smap->bucket_log = ilog2(nbuckets);
- smap = __bpf_local_storage_map_alloc(attr);
- if (IS_ERR(smap))
- return ERR_CAST(smap);
+ smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
+ nbuckets, GFP_USER | __GFP_NOWARN);
+ if (!smap->buckets) {
+ err = -ENOMEM;
+ goto free_smap;
+ }
+
+ for (i = 0; i < nbuckets; i++) {
+ INIT_HLIST_HEAD(&smap->buckets[i].list);
+ raw_spin_lock_init(&smap->buckets[i].lock);
+ }
+
+ smap->elem_size = offsetof(struct bpf_local_storage_elem,
+ sdata.data[attr->value_size]);
+
+ smap->bpf_ma = bpf_ma;
+ if (bpf_ma) {
+ err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
+ if (err)
+ goto free_smap;
+
+ err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false);
+ if (err) {
+ bpf_mem_alloc_destroy(&smap->selem_ma);
+ goto free_smap;
+ }
+ }
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
return &smap->map;
+
+free_smap:
+ kvfree(smap->buckets);
+ bpf_map_area_free(smap);
+ return ERR_PTR(err);
}
void bpf_local_storage_map_free(struct bpf_map *map,
@@ -748,7 +906,7 @@ void bpf_local_storage_map_free(struct bpf_map *map,
migrate_disable();
this_cpu_inc(*busy_counter);
}
- bpf_selem_unlink(selem, false);
+ bpf_selem_unlink(selem, true);
if (busy_counter) {
this_cpu_dec(*busy_counter);
migrate_enable();
@@ -772,26 +930,10 @@ void bpf_local_storage_map_free(struct bpf_map *map,
*/
synchronize_rcu();
- /* Only delay freeing of smap, buckets are not needed anymore */
- kvfree(smap->buckets);
-
- /* When local storage has special fields, callbacks for
- * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will
- * keep using the map BTF record, we need to execute an RCU barrier to
- * wait for them as the record will be freed right after our map_free
- * callback.
- */
- if (!IS_ERR_OR_NULL(smap->map.record)) {
- rcu_barrier_tasks_trace();
- /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp()
- * is true, because while call_rcu invocation is skipped in that
- * case in bpf_selem_free_fields_trace_rcu (and all local
- * storage maps pass use_trace_rcu = true), there can be
- * call_rcu callbacks based on use_trace_rcu = false in the
- * while ((selem = ...)) loop above or when owner's free path
- * calls bpf_local_storage_unlink_nolock.
- */
- rcu_barrier();
+ if (smap->bpf_ma) {
+ bpf_mem_alloc_destroy(&smap->selem_ma);
+ bpf_mem_alloc_destroy(&smap->storage_ma);
}
+ kvfree(smap->buckets);
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 38903fb52f98..d3f0a4825fa6 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -11,11 +11,13 @@
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/btf_ids.h>
+#include <linux/rcupdate_wait.h>
enum bpf_struct_ops_state {
BPF_STRUCT_OPS_STATE_INIT,
BPF_STRUCT_OPS_STATE_INUSE,
BPF_STRUCT_OPS_STATE_TOBEFREE,
+ BPF_STRUCT_OPS_STATE_READY,
};
#define BPF_STRUCT_OPS_COMMON_VALUE \
@@ -58,6 +60,13 @@ struct bpf_struct_ops_map {
struct bpf_struct_ops_value kvalue;
};
+struct bpf_struct_ops_link {
+ struct bpf_link link;
+ struct bpf_map __rcu *map;
+};
+
+static DEFINE_MUTEX(update_mutex);
+
#define VALUE_PREFIX "bpf_struct_ops_"
#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
@@ -249,6 +258,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
struct bpf_struct_ops_value *uvalue, *kvalue;
enum bpf_struct_ops_state state;
+ s64 refcnt;
if (unlikely(*(u32 *)key != 0))
return -ENOENT;
@@ -267,7 +277,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
uvalue = value;
memcpy(uvalue, st_map->uvalue, map->value_size);
uvalue->state = state;
- refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
+
+ /* This value offers the user space a general estimate of how
+ * many sockets are still utilizing this struct_ops for TCP
+ * congestion control. The number might not be exact, but it
+ * should sufficiently meet our present goals.
+ */
+ refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt);
+ refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0));
return 0;
}
@@ -349,8 +366,8 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
model, flags, tlinks, NULL);
}
-static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
const struct bpf_struct_ops *st_ops = st_map->st_ops;
@@ -491,12 +508,29 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*(unsigned long *)(udata + moff) = prog->aux->id;
}
- refcount_set(&kvalue->refcnt, 1);
- bpf_map_inc(map);
+ if (st_map->map.map_flags & BPF_F_LINK) {
+ err = st_ops->validate(kdata);
+ if (err)
+ goto reset_unlock;
+ set_memory_rox((long)st_map->image, 1);
+ /* Let bpf_link handle registration & unregistration.
+ *
+ * Pair with smp_load_acquire() during lookup_elem().
+ */
+ smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
+ goto unlock;
+ }
set_memory_rox((long)st_map->image, 1);
err = st_ops->reg(kdata);
if (likely(!err)) {
+ /* This refcnt increment on the map here after
+ * 'st_ops->reg()' is secure since the state of the
+ * map must be set to INIT at this moment, and thus
+ * bpf_struct_ops_map_delete_elem() can't unregister
+ * or transition it to TOBEFREE concurrently.
+ */
+ bpf_map_inc(map);
/* Pair with smp_load_acquire() during lookup_elem().
* It ensures the above udata updates (e.g. prog->aux->id)
* can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
@@ -512,7 +546,6 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*/
set_memory_nx((long)st_map->image, 1);
set_memory_rw((long)st_map->image, 1);
- bpf_map_put(map);
reset_unlock:
bpf_struct_ops_map_put_progs(st_map);
@@ -524,20 +557,22 @@ unlock:
return err;
}
-static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
+static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
{
enum bpf_struct_ops_state prev_state;
struct bpf_struct_ops_map *st_map;
st_map = (struct bpf_struct_ops_map *)map;
+ if (st_map->map.map_flags & BPF_F_LINK)
+ return -EOPNOTSUPP;
+
prev_state = cmpxchg(&st_map->kvalue.state,
BPF_STRUCT_OPS_STATE_INUSE,
BPF_STRUCT_OPS_STATE_TOBEFREE);
switch (prev_state) {
case BPF_STRUCT_OPS_STATE_INUSE:
st_map->st_ops->unreg(&st_map->kvalue.data);
- if (refcount_dec_and_test(&st_map->kvalue.refcnt))
- bpf_map_put(map);
+ bpf_map_put(map);
return 0;
case BPF_STRUCT_OPS_STATE_TOBEFREE:
return -EINPROGRESS;
@@ -570,7 +605,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
kfree(value);
}
-static void bpf_struct_ops_map_free(struct bpf_map *map)
+static void __bpf_struct_ops_map_free(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
@@ -582,10 +617,32 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
bpf_map_area_free(st_map);
}
+static void bpf_struct_ops_map_free(struct bpf_map *map)
+{
+ /* The struct_ops's function may switch to another struct_ops.
+ *
+ * For example, bpf_tcp_cc_x->init() may switch to
+ * another tcp_cc_y by calling
+ * setsockopt(TCP_CONGESTION, "tcp_cc_y").
+ * During the switch, bpf_struct_ops_put(tcp_cc_x) is called
+ * and its refcount may reach 0 which then free its
+ * trampoline image while tcp_cc_x is still running.
+ *
+ * A vanilla rcu gp is to wait for all bpf-tcp-cc prog
+ * to finish. bpf-tcp-cc prog is non sleepable.
+ * A rcu_tasks gp is to wait for the last few insn
+ * in the tramopline image to finish before releasing
+ * the trampoline image.
+ */
+ synchronize_rcu_mult(call_rcu, call_rcu_tasks);
+
+ __bpf_struct_ops_map_free(map);
+}
+
static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
{
if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
- attr->map_flags || !attr->btf_vmlinux_value_type_id)
+ (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
return -EINVAL;
return 0;
}
@@ -609,6 +666,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
if (attr->value_size != vt->size)
return ERR_PTR(-EINVAL);
+ if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update))
+ return ERR_PTR(-EOPNOTSUPP);
+
t = st_ops->type;
st_map_size = sizeof(*st_map) +
@@ -630,7 +690,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
NUMA_NO_NODE);
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->links || !st_map->image) {
- bpf_struct_ops_map_free(map);
+ __bpf_struct_ops_map_free(map);
return ERR_PTR(-ENOMEM);
}
@@ -676,41 +736,175 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = {
bool bpf_struct_ops_get(const void *kdata)
{
struct bpf_struct_ops_value *kvalue;
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *map;
kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+ st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
- return refcount_inc_not_zero(&kvalue->refcnt);
+ map = __bpf_map_inc_not_zero(&st_map->map, false);
+ return !IS_ERR(map);
}
-static void bpf_struct_ops_put_rcu(struct rcu_head *head)
+void bpf_struct_ops_put(const void *kdata)
{
+ struct bpf_struct_ops_value *kvalue;
struct bpf_struct_ops_map *st_map;
- st_map = container_of(head, struct bpf_struct_ops_map, rcu);
+ kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+ st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
+
bpf_map_put(&st_map->map);
}
-void bpf_struct_ops_put(const void *kdata)
+static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
- struct bpf_struct_ops_value *kvalue;
+ struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
- kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
- if (refcount_dec_and_test(&kvalue->refcnt)) {
- struct bpf_struct_ops_map *st_map;
+ return map->map_type == BPF_MAP_TYPE_STRUCT_OPS &&
+ map->map_flags & BPF_F_LINK &&
+ /* Pair with smp_store_release() during map_update */
+ smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY;
+}
- st_map = container_of(kvalue, struct bpf_struct_ops_map,
- kvalue);
- /* The struct_ops's function may switch to another struct_ops.
- *
- * For example, bpf_tcp_cc_x->init() may switch to
- * another tcp_cc_y by calling
- * setsockopt(TCP_CONGESTION, "tcp_cc_y").
- * During the switch, bpf_struct_ops_put(tcp_cc_x) is called
- * and its map->refcnt may reach 0 which then free its
- * trampoline image while tcp_cc_x is still running.
- *
- * Thus, a rcu grace period is needed here.
+static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_struct_ops_link *st_link;
+ struct bpf_struct_ops_map *st_map;
+
+ st_link = container_of(link, struct bpf_struct_ops_link, link);
+ st_map = (struct bpf_struct_ops_map *)
+ rcu_dereference_protected(st_link->map, true);
+ if (st_map) {
+ /* st_link->map can be NULL if
+ * bpf_struct_ops_link_create() fails to register.
*/
- call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
+ st_map->st_ops->unreg(&st_map->kvalue.data);
+ bpf_map_put(&st_map->map);
+ }
+ kfree(st_link);
+}
+
+static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_struct_ops_link *st_link;
+ struct bpf_map *map;
+
+ st_link = container_of(link, struct bpf_struct_ops_link, link);
+ rcu_read_lock();
+ map = rcu_dereference(st_link->map);
+ seq_printf(seq, "map_id:\t%d\n", map->id);
+ rcu_read_unlock();
+}
+
+static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_struct_ops_link *st_link;
+ struct bpf_map *map;
+
+ st_link = container_of(link, struct bpf_struct_ops_link, link);
+ rcu_read_lock();
+ map = rcu_dereference(st_link->map);
+ info->struct_ops.map_id = map->id;
+ rcu_read_unlock();
+ return 0;
+}
+
+static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map,
+ struct bpf_map *expected_old_map)
+{
+ struct bpf_struct_ops_map *st_map, *old_st_map;
+ struct bpf_map *old_map;
+ struct bpf_struct_ops_link *st_link;
+ int err = 0;
+
+ st_link = container_of(link, struct bpf_struct_ops_link, link);
+ st_map = container_of(new_map, struct bpf_struct_ops_map, map);
+
+ if (!bpf_struct_ops_valid_to_reg(new_map))
+ return -EINVAL;
+
+ mutex_lock(&update_mutex);
+
+ old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+ if (expected_old_map && old_map != expected_old_map) {
+ err = -EPERM;
+ goto err_out;
+ }
+
+ old_st_map = container_of(old_map, struct bpf_struct_ops_map, map);
+ /* The new and old struct_ops must be the same type. */
+ if (st_map->st_ops != old_st_map->st_ops) {
+ err = -EINVAL;
+ goto err_out;
}
+
+ err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
+ if (err)
+ goto err_out;
+
+ bpf_map_inc(new_map);
+ rcu_assign_pointer(st_link->map, new_map);
+ bpf_map_put(old_map);
+
+err_out:
+ mutex_unlock(&update_mutex);
+
+ return err;
}
+
+static const struct bpf_link_ops bpf_struct_ops_map_lops = {
+ .dealloc = bpf_struct_ops_map_link_dealloc,
+ .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
+ .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
+ .update_map = bpf_struct_ops_map_link_update,
+};
+
+int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+ struct bpf_struct_ops_link *link = NULL;
+ struct bpf_link_primer link_primer;
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *map;
+ int err;
+
+ map = bpf_map_get(attr->link_create.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ st_map = (struct bpf_struct_ops_map *)map;
+
+ if (!bpf_struct_ops_valid_to_reg(map)) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto err_out;
+
+ err = st_map->st_ops->reg(st_map->kvalue.data);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ link = NULL;
+ goto err_out;
+ }
+ RCU_INIT_POINTER(link->map, map);
+
+ return bpf_link_settle(&link_primer);
+
+err_out:
+ bpf_map_put(map);
+ kfree(link);
+ return err;
+}
+
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 20f942229f3c..adf6dfe0ba68 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -72,8 +72,6 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
void bpf_task_storage_free(struct task_struct *task)
{
struct bpf_local_storage *local_storage;
- bool free_task_storage = false;
- unsigned long flags;
rcu_read_lock();
@@ -84,14 +82,9 @@ void bpf_task_storage_free(struct task_struct *task)
}
bpf_task_storage_lock();
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- free_task_storage = bpf_local_storage_unlink_nolock(local_storage);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ bpf_local_storage_destroy(local_storage);
bpf_task_storage_unlock();
rcu_read_unlock();
-
- if (free_task_storage)
- kfree_rcu(local_storage, rcu);
}
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -127,8 +120,8 @@ out:
return ERR_PTR(err);
}
-static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
struct bpf_local_storage_data *sdata;
struct task_struct *task;
@@ -175,12 +168,12 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
if (!nobusy)
return -EBUSY;
- bpf_selem_unlink(SELEM(sdata), true);
+ bpf_selem_unlink(SELEM(sdata), false);
return 0;
}
-static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
{
struct task_struct *task;
unsigned int f_flags;
@@ -316,7 +309,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &task_cache);
+ return bpf_local_storage_map_alloc(attr, &task_cache, true);
}
static void task_storage_map_free(struct bpf_map *map)
@@ -345,7 +338,7 @@ const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
@@ -356,7 +349,7 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
@@ -367,7 +360,7 @@ const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};
@@ -376,6 +369,6 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1853beaed4be..2c2d1fb9f410 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3231,12 +3231,6 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
-enum btf_field_info_type {
- BTF_FIELD_SPIN_LOCK,
- BTF_FIELD_TIMER,
- BTF_FIELD_KPTR,
-};
-
enum {
BTF_FIELD_IGNORE = 0,
BTF_FIELD_FOUND = 1,
@@ -3562,7 +3556,10 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
{
struct module *mod = NULL;
const struct btf_type *t;
- struct btf *kernel_btf;
+ /* If a matching btf type is found in kernel or module BTFs, kptr_ref
+ * is that BTF, otherwise it's program BTF
+ */
+ struct btf *kptr_btf;
int ret;
s32 id;
@@ -3571,7 +3568,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
*/
t = btf_type_by_id(btf, info->kptr.type_id);
id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
- &kernel_btf);
+ &kptr_btf);
+ if (id == -ENOENT) {
+ /* btf_parse_kptr should only be called w/ btf = program BTF */
+ WARN_ON_ONCE(btf_is_kernel(btf));
+
+ /* Type exists only in program BTF. Assume that it's a MEM_ALLOC
+ * kptr allocated via bpf_obj_new
+ */
+ field->kptr.dtor = NULL;
+ id = info->kptr.type_id;
+ kptr_btf = (struct btf *)btf;
+ btf_get(kptr_btf);
+ goto found_dtor;
+ }
if (id < 0)
return id;
@@ -3588,20 +3598,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
* can be used as a referenced pointer and be stored in a map at
* the same time.
*/
- dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
+ dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id);
if (dtor_btf_id < 0) {
ret = dtor_btf_id;
goto end_btf;
}
- dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
+ dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id);
if (!dtor_func) {
ret = -ENOENT;
goto end_btf;
}
- if (btf_is_module(kernel_btf)) {
- mod = btf_try_get_module(kernel_btf);
+ if (btf_is_module(kptr_btf)) {
+ mod = btf_try_get_module(kptr_btf);
if (!mod) {
ret = -ENXIO;
goto end_btf;
@@ -3611,7 +3621,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
/* We already verified dtor_func to be btf_type_is_func
* in register_btf_id_dtor_kfuncs.
*/
- dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
+ dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
addr = kallsyms_lookup_name(dtor_func_name);
if (!addr) {
ret = -EINVAL;
@@ -3620,14 +3630,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
field->kptr.dtor = (void *)addr;
}
+found_dtor:
field->kptr.btf_id = id;
- field->kptr.btf = kernel_btf;
+ field->kptr.btf = kptr_btf;
field->kptr.module = mod;
return 0;
end_mod:
module_put(mod);
end_btf:
- btf_put(kernel_btf);
+ btf_put(kptr_btf);
return ret;
}
@@ -5494,38 +5505,45 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
return 0;
}
-static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
- u32 log_level, char __user *log_ubuf, u32 log_size)
+static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size)
{
+ u32 log_true_size;
+ int err;
+
+ err = bpf_vlog_finalize(log, &log_true_size);
+
+ if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size),
+ &log_true_size, sizeof(log_true_size)))
+ err = -EFAULT;
+
+ return err;
+}
+
+static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+{
+ bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel);
+ char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf);
struct btf_struct_metas *struct_meta_tab;
struct btf_verifier_env *env = NULL;
- struct bpf_verifier_log *log;
struct btf *btf = NULL;
u8 *data;
- int err;
+ int err, ret;
- if (btf_data_size > BTF_MAX_SIZE)
+ if (attr->btf_size > BTF_MAX_SIZE)
return ERR_PTR(-E2BIG);
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
if (!env)
return ERR_PTR(-ENOMEM);
- log = &env->log;
- if (log_level || log_ubuf || log_size) {
- /* user requested verbose verifier output
- * and supplied buffer to store the verification trace
- */
- log->level = log_level;
- log->ubuf = log_ubuf;
- log->len_total = log_size;
-
- /* log attributes have to be sane */
- if (!bpf_verifier_log_attr_valid(log)) {
- err = -EINVAL;
- goto errout;
- }
- }
+ /* user could have requested verbose verifier output
+ * and supplied buffer to store the verification trace
+ */
+ err = bpf_vlog_init(&env->log, attr->btf_log_level,
+ log_ubuf, attr->btf_log_size);
+ if (err)
+ goto errout_free;
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
if (!btf) {
@@ -5534,16 +5552,16 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
}
env->btf = btf;
- data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
+ data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
err = -ENOMEM;
goto errout;
}
btf->data = data;
- btf->data_size = btf_data_size;
+ btf->data_size = attr->btf_size;
- if (copy_from_bpfptr(data, btf_data, btf_data_size)) {
+ if (copy_from_bpfptr(data, btf_data, attr->btf_size)) {
err = -EFAULT;
goto errout;
}
@@ -5566,7 +5584,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
if (err)
goto errout;
- struct_meta_tab = btf_parse_struct_metas(log, btf);
+ struct_meta_tab = btf_parse_struct_metas(&env->log, btf);
if (IS_ERR(struct_meta_tab)) {
err = PTR_ERR(struct_meta_tab);
goto errout;
@@ -5583,10 +5601,9 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
}
}
- if (log->level && bpf_verifier_log_full(log)) {
- err = -ENOSPC;
- goto errout_meta;
- }
+ err = finalize_log(&env->log, uattr, uattr_size);
+ if (err)
+ goto errout_free;
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
@@ -5595,6 +5612,11 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
errout_meta:
btf_free_struct_meta_tab(btf);
errout:
+ /* overwrite err with -ENOSPC or -EFAULT */
+ ret = finalize_log(&env->log, uattr, uattr_size);
+ if (ret)
+ err = ret;
+errout_free:
btf_verifier_env_free(env);
if (btf)
btf_free(btf);
@@ -5900,12 +5922,8 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
{
- /* t comes in already as a pointer */
- t = btf_type_by_id(btf, t->type);
-
- /* allow const */
- if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
- t = btf_type_by_id(btf, t->type);
+ /* skip modifiers */
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
return btf_type_is_int(t);
}
@@ -6156,7 +6174,8 @@ enum bpf_struct_walk_result {
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ u32 *next_btf_id, enum bpf_type_flag *flag,
+ const char **field_name)
{
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
@@ -6385,6 +6404,8 @@ error:
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
*flag |= tmp_flag;
+ if (field_name)
+ *field_name = mname;
return WALK_PTR;
}
}
@@ -6411,7 +6432,8 @@ error:
int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype __maybe_unused,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ u32 *next_btf_id, enum bpf_type_flag *flag,
+ const char **field_name)
{
const struct btf *btf = reg->btf;
enum bpf_type_flag tmp_flag = 0;
@@ -6443,7 +6465,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
t = btf_type_by_id(btf, id);
do {
- err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
+ err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name);
switch (err) {
case WALK_PTR:
@@ -6518,7 +6540,7 @@ again:
type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
+ err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL);
if (err != WALK_STRUCT)
return false;
@@ -7199,15 +7221,12 @@ static int __btf_new_fd(struct btf *btf)
return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
}
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr)
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
struct btf *btf;
int ret;
- btf = btf_parse(make_bpfptr(attr->btf, uattr.is_kernel),
- attr->btf_size, attr->btf_log_level,
- u64_to_user_ptr(attr->btf_log_buf),
- attr->btf_log_size);
+ btf = btf_parse(attr, uattr, uattr_size);
if (IS_ERR(btf))
return PTR_ERR(btf);
@@ -7597,6 +7616,108 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
+static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
+ const struct btf_type *func, u32 func_flags)
+{
+ u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
+ const char *name, *sfx, *iter_name;
+ const struct btf_param *arg;
+ const struct btf_type *t;
+ char exp_name[128];
+ u32 nr_args;
+
+ /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
+ if (!flags || (flags & (flags - 1)))
+ return -EINVAL;
+
+ /* any BPF iter kfunc should have `struct bpf_iter_<type> *` first arg */
+ nr_args = btf_type_vlen(func);
+ if (nr_args < 1)
+ return -EINVAL;
+
+ arg = &btf_params(func)[0];
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
+ if (!t || !__btf_type_is_struct(t))
+ return -EINVAL;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
+ return -EINVAL;
+
+ /* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
+ * fit nicely in stack slots
+ */
+ if (t->size == 0 || (t->size % 8))
+ return -EINVAL;
+
+ /* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
+ * naming pattern
+ */
+ iter_name = name + sizeof(ITER_PREFIX) - 1;
+ if (flags & KF_ITER_NEW)
+ sfx = "new";
+ else if (flags & KF_ITER_NEXT)
+ sfx = "next";
+ else /* (flags & KF_ITER_DESTROY) */
+ sfx = "destroy";
+
+ snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx);
+ if (strcmp(func_name, exp_name))
+ return -EINVAL;
+
+ /* only iter constructor should have extra arguments */
+ if (!(flags & KF_ITER_NEW) && nr_args != 1)
+ return -EINVAL;
+
+ if (flags & KF_ITER_NEXT) {
+ /* bpf_iter_<type>_next() should return pointer */
+ t = btf_type_skip_modifiers(btf, func->type, NULL);
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ }
+
+ if (flags & KF_ITER_DESTROY) {
+ /* bpf_iter_<type>_destroy() should return void */
+ t = btf_type_by_id(btf, func->type);
+ if (!t || !btf_type_is_void(t))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
+{
+ const struct btf_type *func;
+ const char *func_name;
+ int err;
+
+ /* any kfunc should be FUNC -> FUNC_PROTO */
+ func = btf_type_by_id(btf, func_id);
+ if (!func || !btf_type_is_func(func))
+ return -EINVAL;
+
+ /* sanity check kfunc name */
+ func_name = btf_name_by_offset(btf, func->name_off);
+ if (!func_name || !func_name[0])
+ return -EINVAL;
+
+ func = btf_type_by_id(btf, func->type);
+ if (!func || !btf_type_is_func_proto(func))
+ return -EINVAL;
+
+ if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) {
+ err = btf_check_iter_kfuncs(btf, func_name, func, func_flags);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/* Kernel Function (kfunc) BTF ID set registration API */
static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
@@ -7773,7 +7894,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
const struct btf_kfunc_id_set *kset)
{
struct btf *btf;
- int ret;
+ int ret, i;
btf = btf_get_module_btf(kset->owner);
if (!btf) {
@@ -7790,7 +7911,15 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
if (IS_ERR(btf))
return PTR_ERR(btf);
+ for (i = 0; i < kset->set->cnt; i++) {
+ ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id,
+ kset->set->pairs[i].flags);
+ if (ret)
+ goto err_out;
+ }
+
ret = btf_populate_kfunc_set(btf, hook, kset->set);
+err_out:
btf_put(btf);
return ret;
}
@@ -8368,16 +8497,15 @@ out:
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, const char *suffix)
+ const char *field_name, u32 btf_id, const char *suffix)
{
struct btf *btf = reg->btf;
const struct btf_type *walk_type, *safe_type;
const char *tname;
char safe_tname[64];
long ret, safe_id;
- const struct btf_member *member, *m_walk = NULL;
+ const struct btf_member *member;
u32 i;
- const char *walk_name;
walk_type = btf_type_by_id(btf, reg->btf_id);
if (!walk_type)
@@ -8397,30 +8525,17 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
if (!safe_type)
return false;
- for_each_member(i, walk_type, member) {
- u32 moff;
-
- /* We're looking for the PTR_TO_BTF_ID member in the struct
- * type we're walking which matches the specified offset.
- * Below, we'll iterate over the fields in the safe variant of
- * the struct and see if any of them has a matching type /
- * name.
- */
- moff = __btf_member_bit_offset(walk_type, member) / 8;
- if (off == moff) {
- m_walk = member;
- break;
- }
- }
- if (m_walk == NULL)
- return false;
-
- walk_name = __btf_name_by_offset(btf, m_walk->name_off);
for_each_member(i, safe_type, member) {
const char *m_name = __btf_name_by_offset(btf, member->name_off);
+ const struct btf_type *mtype = btf_type_by_id(btf, member->type);
+ u32 id;
+
+ if (!btf_type_is_ptr(mtype))
+ continue;
+ btf_type_skip_modifiers(btf, mtype->type, &id);
/* If we match on both type and name, the field is considered trusted. */
- if (m_walk->type == member->type && !strcmp(walk_name, m_name))
+ if (btf_id == id && !strcmp(field_name, m_name))
return true;
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 871809e71b4e..8ec18faa74ac 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -540,7 +540,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
}
}
-static int cpu_map_delete_elem(struct bpf_map *map, void *key)
+static long cpu_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
u32 key_cpu = *(u32 *)key;
@@ -553,8 +553,8 @@ static int cpu_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct bpf_cpumap_val cpumap_value = {};
@@ -667,7 +667,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags)
+static long cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags)
{
return __bpf_xdp_redirect_map(map, index, flags, 0,
__cpu_map_lookup_elem);
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index b6587ec40f1b..7efdf5d770ca 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -9,6 +9,7 @@
/**
* struct bpf_cpumask - refcounted BPF cpumask wrapper structure
* @cpumask: The actual cpumask embedded in the struct.
+ * @rcu: The RCU head used to free the cpumask with RCU safety.
* @usage: Object reference counter. When the refcount goes to 0, the
* memory is released back to the BPF allocator, which provides
* RCU safety.
@@ -24,6 +25,7 @@
*/
struct bpf_cpumask {
cpumask_t cpumask;
+ struct rcu_head rcu;
refcount_t usage;
};
@@ -80,32 +82,14 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
return cpumask;
}
-/**
- * bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask
- * stored in a map.
- * @cpumaskp: A pointer to a BPF cpumask map value.
- *
- * Attempts to acquire a reference to a BPF cpumask stored in a map value. The
- * cpumask returned by this function must either be embedded in a map as a
- * kptr, or freed with bpf_cpumask_release(). This function may return NULL if
- * no BPF cpumask was found in the specified map value.
- */
-__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
+static void cpumask_free_cb(struct rcu_head *head)
{
struct bpf_cpumask *cpumask;
- /* The BPF memory allocator frees memory backing its caches in an RCU
- * callback. Thus, we can safely use RCU to ensure that the cpumask is
- * safe to read.
- */
- rcu_read_lock();
-
- cpumask = READ_ONCE(*cpumaskp);
- if (cpumask && !refcount_inc_not_zero(&cpumask->usage))
- cpumask = NULL;
-
- rcu_read_unlock();
- return cpumask;
+ cpumask = container_of(head, struct bpf_cpumask, rcu);
+ migrate_disable();
+ bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
+ migrate_enable();
}
/**
@@ -118,14 +102,8 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumas
*/
__bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
{
- if (!cpumask)
- return;
-
- if (refcount_dec_and_test(&cpumask->usage)) {
- migrate_disable();
- bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
- migrate_enable();
- }
+ if (refcount_dec_and_test(&cpumask->usage))
+ call_rcu(&cpumask->rcu, cpumask_free_cb);
}
/**
@@ -424,9 +402,8 @@ __diag_pop();
BTF_SET8_START(cpumask_kfunc_btf_ids)
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 19b036a228f7..802692fa3905 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -809,7 +809,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
kfree(dev);
}
-static int dev_map_delete_elem(struct bpf_map *map, void *key)
+static long dev_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *old_dev;
@@ -826,7 +826,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
+static long dev_map_hash_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *old_dev;
@@ -897,8 +897,8 @@ err_out:
return ERR_PTR(-EINVAL);
}
-static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
- void *key, void *value, u64 map_flags)
+static long __dev_map_update_elem(struct net *net, struct bpf_map *map,
+ void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev, *old_dev;
@@ -939,15 +939,15 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
return 0;
}
-static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
return __dev_map_update_elem(current->nsproxy->net_ns,
map, key, value, map_flags);
}
-static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
- void *key, void *value, u64 map_flags)
+static long __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
+ void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev, *old_dev;
@@ -999,21 +999,21 @@ out_err:
return err;
}
-static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
return __dev_map_hash_update_elem(current->nsproxy->net_ns,
map, key, value, map_flags);
}
-static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
+static long dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
__dev_map_lookup_elem);
}
-static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
+static long dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 0df4b0c10f59..00c253b84bf5 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -607,6 +607,8 @@ free_htab:
static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
{
+ if (likely(key_len % 4 == 0))
+ return jhash2(key, key_len / 4, hashrnd);
return jhash(key, key_len, hashrnd);
}
@@ -1073,8 +1075,8 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
}
/* Called from syscall or from eBPF program */
-static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
@@ -1175,8 +1177,8 @@ static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
-static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new, *l_old = NULL;
@@ -1242,9 +1244,9 @@ err:
return ret;
}
-static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags,
- bool onallcpus)
+static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags,
+ bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
@@ -1297,9 +1299,9 @@ err:
return ret;
}
-static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags,
- bool onallcpus)
+static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags,
+ bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
@@ -1364,21 +1366,21 @@ err:
return ret;
}
-static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
}
-static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
false);
}
/* Called from syscall or from eBPF program */
-static int htab_map_delete_elem(struct bpf_map *map, void *key)
+static long htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
@@ -1414,7 +1416,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
return ret;
}
-static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
+static long htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
@@ -2134,8 +2136,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
};
-static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
- void *callback_ctx, u64 flags)
+static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
+ void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 637ac4e92e75..f04e60a4847f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,7 @@
#include <linux/pid_namespace.h>
#include <linux/poison.h>
#include <linux/proc_ns.h>
+#include <linux/sched/task.h>
#include <linux/security.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
@@ -257,7 +258,7 @@ BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
goto err_clear;
/* Verifier guarantees that size > 0 */
- strscpy(buf, task->comm, size);
+ strscpy_pad(buf, task->comm, size);
return 0;
err_clear:
memset(buf, 0, size);
@@ -571,7 +572,7 @@ static const struct bpf_func_proto bpf_strncmp_proto = {
.func = bpf_strncmp,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_CONST_STR,
};
@@ -1896,14 +1897,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
+{
+ if (rec)
+ bpf_obj_free_fields(rec, p);
+ bpf_mem_free(&bpf_global_ma, p);
+}
+
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
void *p = p__alloc;
- if (meta)
- bpf_obj_free_fields(meta->record, p);
- bpf_mem_free(&bpf_global_ma, p);
+ __bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}
static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
@@ -2008,73 +2014,8 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
*/
__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
{
- return get_task_struct(p);
-}
-
-/**
- * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
- * acquired by this kfunc which is not stored in a map as a kptr, must be
- * released by calling bpf_task_release().
- * @p: The task on which a reference is being acquired.
- */
-__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
-{
- /* For the time being this function returns NULL, as it's not currently
- * possible to safely acquire a reference to a task with RCU protection
- * using get_task_struct() and put_task_struct(). This is due to the
- * slightly odd mechanics of p->rcu_users, and how task RCU protection
- * works.
- *
- * A struct task_struct is refcounted by two different refcount_t
- * fields:
- *
- * 1. p->usage: The "true" refcount field which tracks a task's
- * lifetime. The task is freed as soon as this
- * refcount drops to 0.
- *
- * 2. p->rcu_users: An "RCU users" refcount field which is statically
- * initialized to 2, and is co-located in a union with
- * a struct rcu_head field (p->rcu). p->rcu_users
- * essentially encapsulates a single p->usage
- * refcount, and when p->rcu_users goes to 0, an RCU
- * callback is scheduled on the struct rcu_head which
- * decrements the p->usage refcount.
- *
- * There are two important implications to this task refcounting logic
- * described above. The first is that
- * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
- * after the refcount goes to 0, the RCU callback being scheduled will
- * cause the memory backing the refcount to again be nonzero due to the
- * fields sharing a union. The other is that we can't rely on RCU to
- * guarantee that a task is valid in a BPF program. This is because a
- * task could have already transitioned to being in the TASK_DEAD
- * state, had its rcu_users refcount go to 0, and its rcu callback
- * invoked in which it drops its single p->usage reference. At this
- * point the task will be freed as soon as the last p->usage reference
- * goes to 0, without waiting for another RCU gp to elapse. The only
- * way that a BPF program can guarantee that a task is valid is in this
- * scenario is to hold a p->usage refcount itself.
- *
- * Until we're able to resolve this issue, either by pulling
- * p->rcu_users and p->rcu out of the union, or by getting rid of
- * p->usage and just using p->rcu_users for refcounting, we'll just
- * return NULL here.
- */
- return NULL;
-}
-
-/**
- * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
- * kptr acquired by this kfunc which is not subsequently stored in a map, must
- * be released by calling bpf_task_release().
- * @pp: A pointer to a task kptr on which a reference is being acquired.
- */
-__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
-{
- /* We must return NULL here until we have clarity on how to properly
- * leverage RCU for ensuring a task's lifetime. See the comment above
- * in bpf_task_acquire_not_zero() for more details.
- */
+ if (refcount_inc_not_zero(&p->rcu_users))
+ return p;
return NULL;
}
@@ -2084,10 +2025,7 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
*/
__bpf_kfunc void bpf_task_release(struct task_struct *p)
{
- if (!p)
- return;
-
- put_task_struct(p);
+ put_task_struct_rcu_user(p);
}
#ifdef CONFIG_CGROUPS
@@ -2099,39 +2037,7 @@ __bpf_kfunc void bpf_task_release(struct task_struct *p)
*/
__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
{
- cgroup_get(cgrp);
- return cgrp;
-}
-
-/**
- * bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup
- * kptr acquired by this kfunc which is not subsequently stored in a map, must
- * be released by calling bpf_cgroup_release().
- * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired.
- */
-__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
-{
- struct cgroup *cgrp;
-
- rcu_read_lock();
- /* Another context could remove the cgroup from the map and release it
- * at any time, including after we've done the lookup above. This is
- * safe because we're in an RCU read region, so the cgroup is
- * guaranteed to remain valid until at least the rcu_read_unlock()
- * below.
- */
- cgrp = READ_ONCE(*cgrpp);
-
- if (cgrp && !cgroup_tryget(cgrp))
- /* If the cgroup had been removed from the map and freed as
- * described above, cgroup_tryget() will return false. The
- * cgroup will be freed at some point after the current RCU gp
- * has ended, so just return NULL to the user.
- */
- cgrp = NULL;
- rcu_read_unlock();
-
- return cgrp;
+ return cgroup_tryget(cgrp) ? cgrp : NULL;
}
/**
@@ -2143,9 +2049,6 @@ __bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
*/
__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
{
- if (!cgrp)
- return;
-
cgroup_put(cgrp);
}
@@ -2200,7 +2103,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
rcu_read_lock();
p = find_task_by_pid_ns(pid, &init_pid_ns);
if (p)
- bpf_task_acquire(p);
+ p = bpf_task_acquire(p);
rcu_read_unlock();
return p;
@@ -2372,17 +2275,14 @@ BTF_ID_FLAGS(func, bpf_list_push_front)
BTF_ID_FLAGS(func, bpf_list_push_back)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_rbtree_add)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
-BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
@@ -2411,6 +2311,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index a993560f200a..4c7bbec4a9e4 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -141,8 +141,8 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key)
return &READ_ONCE(storage->buf)->data[0];
}
-static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static long cgroup_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
{
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
@@ -348,7 +348,7 @@ static void cgroup_storage_map_free(struct bpf_map *_map)
bpf_map_area_free(map);
}
-static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
+static long cgroup_storage_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
new file mode 100644
index 000000000000..046ddff37a76
--- /dev/null
+++ b/kernel/bpf/log.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+ */
+#include <uapi/linux/btf.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/math64.h>
+
+static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
+{
+ /* ubuf and len_total should both be specified (or not) together */
+ if (!!log->ubuf != !!log->len_total)
+ return false;
+ /* log buf without log_level is meaningless */
+ if (log->ubuf && log->level == 0)
+ return false;
+ if (log->level & ~BPF_LOG_MASK)
+ return false;
+ if (log->len_total > UINT_MAX >> 2)
+ return false;
+ return true;
+}
+
+int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
+ char __user *log_buf, u32 log_size)
+{
+ log->level = log_level;
+ log->ubuf = log_buf;
+ log->len_total = log_size;
+
+ /* log attributes have to be sane */
+ if (!bpf_verifier_log_attr_valid(log))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void bpf_vlog_update_len_max(struct bpf_verifier_log *log, u32 add_len)
+{
+ /* add_len includes terminal \0, so no need for +1. */
+ u64 len = log->end_pos + add_len;
+
+ /* log->len_max could be larger than our current len due to
+ * bpf_vlog_reset() calls, so we maintain the max of any length at any
+ * previous point
+ */
+ if (len > UINT_MAX)
+ log->len_max = UINT_MAX;
+ else if (len > log->len_max)
+ log->len_max = len;
+}
+
+void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+ va_list args)
+{
+ u64 cur_pos;
+ u32 new_n, n;
+
+ n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
+
+ WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
+ "verifier log line truncated - local buffer too short\n");
+
+ if (log->level == BPF_LOG_KERNEL) {
+ bool newline = n > 0 && log->kbuf[n - 1] == '\n';
+
+ pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
+ return;
+ }
+
+ n += 1; /* include terminating zero */
+ bpf_vlog_update_len_max(log, n);
+
+ if (log->level & BPF_LOG_FIXED) {
+ /* check if we have at least something to put into user buf */
+ new_n = 0;
+ if (log->end_pos < log->len_total) {
+ new_n = min_t(u32, log->len_total - log->end_pos, n);
+ log->kbuf[new_n - 1] = '\0';
+ }
+
+ cur_pos = log->end_pos;
+ log->end_pos += n - 1; /* don't count terminating '\0' */
+
+ if (log->ubuf && new_n &&
+ copy_to_user(log->ubuf + cur_pos, log->kbuf, new_n))
+ goto fail;
+ } else {
+ u64 new_end, new_start;
+ u32 buf_start, buf_end, new_n;
+
+ new_end = log->end_pos + n;
+ if (new_end - log->start_pos >= log->len_total)
+ new_start = new_end - log->len_total;
+ else
+ new_start = log->start_pos;
+
+ log->start_pos = new_start;
+ log->end_pos = new_end - 1; /* don't count terminating '\0' */
+
+ if (!log->ubuf)
+ return;
+
+ new_n = min(n, log->len_total);
+ cur_pos = new_end - new_n;
+ div_u64_rem(cur_pos, log->len_total, &buf_start);
+ div_u64_rem(new_end, log->len_total, &buf_end);
+ /* new_end and buf_end are exclusive indices, so if buf_end is
+ * exactly zero, then it actually points right to the end of
+ * ubuf and there is no wrap around
+ */
+ if (buf_end == 0)
+ buf_end = log->len_total;
+
+ /* if buf_start > buf_end, we wrapped around;
+ * if buf_start == buf_end, then we fill ubuf completely; we
+ * can't have buf_start == buf_end to mean that there is
+ * nothing to write, because we always write at least
+ * something, even if terminal '\0'
+ */
+ if (buf_start < buf_end) {
+ /* message fits within contiguous chunk of ubuf */
+ if (copy_to_user(log->ubuf + buf_start,
+ log->kbuf + n - new_n,
+ buf_end - buf_start))
+ goto fail;
+ } else {
+ /* message wraps around the end of ubuf, copy in two chunks */
+ if (copy_to_user(log->ubuf + buf_start,
+ log->kbuf + n - new_n,
+ log->len_total - buf_start))
+ goto fail;
+ if (copy_to_user(log->ubuf,
+ log->kbuf + n - buf_end,
+ buf_end))
+ goto fail;
+ }
+ }
+
+ return;
+fail:
+ log->ubuf = NULL;
+}
+
+void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos)
+{
+ char zero = 0;
+ u32 pos;
+
+ if (WARN_ON_ONCE(new_pos > log->end_pos))
+ return;
+
+ if (!bpf_verifier_log_needed(log) || log->level == BPF_LOG_KERNEL)
+ return;
+
+ /* if position to which we reset is beyond current log window,
+ * then we didn't preserve any useful content and should adjust
+ * start_pos to end up with an empty log (start_pos == end_pos)
+ */
+ log->end_pos = new_pos;
+ if (log->end_pos < log->start_pos)
+ log->start_pos = log->end_pos;
+
+ if (!log->ubuf)
+ return;
+
+ if (log->level & BPF_LOG_FIXED)
+ pos = log->end_pos + 1;
+ else
+ div_u64_rem(new_pos, log->len_total, &pos);
+
+ if (pos < log->len_total && put_user(zero, log->ubuf + pos))
+ log->ubuf = NULL;
+}
+
+static void bpf_vlog_reverse_kbuf(char *buf, int len)
+{
+ int i, j;
+
+ for (i = 0, j = len - 1; i < j; i++, j--)
+ swap(buf[i], buf[j]);
+}
+
+static int bpf_vlog_reverse_ubuf(struct bpf_verifier_log *log, int start, int end)
+{
+ /* we split log->kbuf into two equal parts for both ends of array */
+ int n = sizeof(log->kbuf) / 2, nn;
+ char *lbuf = log->kbuf, *rbuf = log->kbuf + n;
+
+ /* Read ubuf's section [start, end) two chunks at a time, from left
+ * and right side; within each chunk, swap all the bytes; after that
+ * reverse the order of lbuf and rbuf and write result back to ubuf.
+ * This way we'll end up with swapped contents of specified
+ * [start, end) ubuf segment.
+ */
+ while (end - start > 1) {
+ nn = min(n, (end - start ) / 2);
+
+ if (copy_from_user(lbuf, log->ubuf + start, nn))
+ return -EFAULT;
+ if (copy_from_user(rbuf, log->ubuf + end - nn, nn))
+ return -EFAULT;
+
+ bpf_vlog_reverse_kbuf(lbuf, nn);
+ bpf_vlog_reverse_kbuf(rbuf, nn);
+
+ /* we write lbuf to the right end of ubuf, while rbuf to the
+ * left one to end up with properly reversed overall ubuf
+ */
+ if (copy_to_user(log->ubuf + start, rbuf, nn))
+ return -EFAULT;
+ if (copy_to_user(log->ubuf + end - nn, lbuf, nn))
+ return -EFAULT;
+
+ start += nn;
+ end -= nn;
+ }
+
+ return 0;
+}
+
+int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual)
+{
+ u32 sublen;
+ int err;
+
+ *log_size_actual = 0;
+ if (!log || log->level == 0 || log->level == BPF_LOG_KERNEL)
+ return 0;
+
+ if (!log->ubuf)
+ goto skip_log_rotate;
+ /* If we never truncated log, there is nothing to move around. */
+ if (log->start_pos == 0)
+ goto skip_log_rotate;
+
+ /* Otherwise we need to rotate log contents to make it start from the
+ * buffer beginning and be a continuous zero-terminated string. Note
+ * that if log->start_pos != 0 then we definitely filled up entire log
+ * buffer with no gaps, and we just need to shift buffer contents to
+ * the left by (log->start_pos % log->len_total) bytes.
+ *
+ * Unfortunately, user buffer could be huge and we don't want to
+ * allocate temporary kernel memory of the same size just to shift
+ * contents in a straightforward fashion. Instead, we'll be clever and
+ * do in-place array rotation. This is a leetcode-style problem, which
+ * could be solved by three rotations.
+ *
+ * Let's say we have log buffer that has to be shifted left by 7 bytes
+ * (spaces and vertical bar is just for demonstrative purposes):
+ * E F G H I J K | A B C D
+ *
+ * First, we reverse entire array:
+ * D C B A | K J I H G F E
+ *
+ * Then we rotate first 4 bytes (DCBA) and separately last 7 bytes
+ * (KJIHGFE), resulting in a properly rotated array:
+ * A B C D | E F G H I J K
+ *
+ * We'll utilize log->kbuf to read user memory chunk by chunk, swap
+ * bytes, and write them back. Doing it byte-by-byte would be
+ * unnecessarily inefficient. Altogether we are going to read and
+ * write each byte twice, for total 4 memory copies between kernel and
+ * user space.
+ */
+
+ /* length of the chopped off part that will be the beginning;
+ * len(ABCD) in the example above
+ */
+ div_u64_rem(log->start_pos, log->len_total, &sublen);
+ sublen = log->len_total - sublen;
+
+ err = bpf_vlog_reverse_ubuf(log, 0, log->len_total);
+ err = err ?: bpf_vlog_reverse_ubuf(log, 0, sublen);
+ err = err ?: bpf_vlog_reverse_ubuf(log, sublen, log->len_total);
+ if (err)
+ log->ubuf = NULL;
+
+skip_log_rotate:
+ *log_size_actual = log->len_max;
+
+ /* properly initialized log has either both ubuf!=NULL and len_total>0
+ * or ubuf==NULL and len_total==0, so if this condition doesn't hold,
+ * we got a fault somewhere along the way, so report it back
+ */
+ if (!!log->ubuf != !!log->len_total)
+ return -EFAULT;
+
+ /* did truncation actually happen? */
+ if (log->ubuf && log->len_max > log->len_total)
+ return -ENOSPC;
+
+ return 0;
+}
+
+/* log_level controls verbosity level of eBPF verifier.
+ * bpf_verifier_log_write() is used to dump the verification trace to the log,
+ * so the user can figure out what's wrong with the program
+ */
+__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (!bpf_verifier_log_needed(&env->log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(&env->log, fmt, args);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
+
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(bpf_log);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index dc23f2ac9cde..e0d3ddf2037a 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -300,8 +300,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
}
/* Called from syscall or from eBPF program */
-static int trie_update_elem(struct bpf_map *map,
- void *_key, void *value, u64 flags)
+static long trie_update_elem(struct bpf_map *map,
+ void *_key, void *value, u64 flags)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
@@ -431,7 +431,7 @@ out:
}
/* Called from syscall or from eBPF program */
-static int trie_delete_elem(struct bpf_map *map, void *_key)
+static long trie_delete_elem(struct bpf_map *map, void *_key)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key;
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 5fcdacbb8439..410637c225fb 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -121,15 +121,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
return entry;
}
-static void *__alloc(struct bpf_mem_cache *c, int node)
+static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
- /* Allocate, but don't deplete atomic reserves that typical
- * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
- * will allocate from the current numa node which is what we
- * want here.
- */
- gfp_t flags = GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT;
-
if (c->percpu_size) {
void **obj = kmalloc_node(c->percpu_size, flags, node);
void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
@@ -185,7 +178,12 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
*/
obj = __llist_del_first(&c->free_by_rcu);
if (!obj) {
- obj = __alloc(c, node);
+ /* Allocate, but don't deplete atomic reserves that typical
+ * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
+ * will allocate from the current numa node which is what we
+ * want here.
+ */
+ obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT);
if (!obj)
break;
}
@@ -676,3 +674,46 @@ void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr)
unit_free(this_cpu_ptr(ma->cache), ptr);
}
+
+/* Directly does a kfree() without putting 'ptr' back to the free_llist
+ * for reuse and without waiting for a rcu_tasks_trace gp.
+ * The caller must first go through the rcu_tasks_trace gp for 'ptr'
+ * before calling bpf_mem_cache_raw_free().
+ * It could be used when the rcu_tasks_trace callback does not have
+ * a hold on the original bpf_mem_alloc object that allocated the
+ * 'ptr'. This should only be used in the uncommon code path.
+ * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled
+ * and may affect performance.
+ */
+void bpf_mem_cache_raw_free(void *ptr)
+{
+ if (!ptr)
+ return;
+
+ kfree(ptr - LLIST_NODE_SZ);
+}
+
+/* When flags == GFP_KERNEL, it signals that the caller will not cause
+ * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use
+ * kmalloc if the free_llist is empty.
+ */
+void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
+{
+ struct bpf_mem_cache *c;
+ void *ret;
+
+ c = this_cpu_ptr(ma->cache);
+
+ ret = unit_alloc(c);
+ if (!ret && flags == GFP_KERNEL) {
+ struct mem_cgroup *memcg, *old_memcg;
+
+ memcg = get_memcg(c);
+ old_memcg = set_active_memcg(memcg);
+ ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+ }
+
+ return !ret ? NULL : ret + LLIST_NODE_SZ;
+}
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 63ecbbcb349d..601609164ef3 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -95,7 +95,7 @@ static void queue_stack_map_free(struct bpf_map *map)
bpf_map_area_free(qs);
}
-static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
+static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
{
struct bpf_queue_stack *qs = bpf_queue_stack(map);
unsigned long flags;
@@ -124,7 +124,7 @@ out:
}
-static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
+static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
{
struct bpf_queue_stack *qs = bpf_queue_stack(map);
unsigned long flags;
@@ -156,32 +156,32 @@ out:
}
/* Called from syscall or from eBPF program */
-static int queue_map_peek_elem(struct bpf_map *map, void *value)
+static long queue_map_peek_elem(struct bpf_map *map, void *value)
{
return __queue_map_get(map, value, false);
}
/* Called from syscall or from eBPF program */
-static int stack_map_peek_elem(struct bpf_map *map, void *value)
+static long stack_map_peek_elem(struct bpf_map *map, void *value)
{
return __stack_map_get(map, value, false);
}
/* Called from syscall or from eBPF program */
-static int queue_map_pop_elem(struct bpf_map *map, void *value)
+static long queue_map_pop_elem(struct bpf_map *map, void *value)
{
return __queue_map_get(map, value, true);
}
/* Called from syscall or from eBPF program */
-static int stack_map_pop_elem(struct bpf_map *map, void *value)
+static long stack_map_pop_elem(struct bpf_map *map, void *value)
{
return __stack_map_get(map, value, true);
}
/* Called from syscall or from eBPF program */
-static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
- u64 flags)
+static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
+ u64 flags)
{
struct bpf_queue_stack *qs = bpf_queue_stack(map);
unsigned long irq_flags;
@@ -227,14 +227,14 @@ static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
}
/* Called from syscall or from eBPF program */
-static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static long queue_stack_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
{
return -EINVAL;
}
/* Called from syscall or from eBPF program */
-static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
+static long queue_stack_map_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 71cb72f5b733..cbf2d8d784b8 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -59,7 +59,7 @@ static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key)
}
/* Called from syscall only */
-static int reuseport_array_delete_elem(struct bpf_map *map, void *key)
+static long reuseport_array_delete_elem(struct bpf_map *map, void *key)
{
struct reuseport_array *array = reuseport_array(map);
u32 index = *(u32 *)key;
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0d2a45ff83f1..875ac9b698d9 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -242,13 +242,13 @@ static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key)
return ERR_PTR(-ENOTSUPP);
}
-static int ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 flags)
+static long ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 flags)
{
return -ENOTSUPP;
}
-static int ringbuf_map_delete_elem(struct bpf_map *map, void *key)
+static long ringbuf_map_delete_elem(struct bpf_map *map, void *key)
{
return -ENOTSUPP;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 0f1d8dced933..b25fce425b2c 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -618,14 +618,14 @@ static int stack_map_get_next_key(struct bpf_map *map, void *key,
return 0;
}
-static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long stack_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
return -EINVAL;
}
/* Called from syscall or from eBPF program */
-static int stack_map_delete_elem(struct bpf_map *map, void *key)
+static long stack_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *old_bucket;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f406dfa13792..6d575505f89c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -520,14 +520,14 @@ static int btf_field_cmp(const void *a, const void *b)
}
struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset,
- enum btf_field_type type)
+ u32 field_mask)
{
struct btf_field *field;
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask))
return NULL;
field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp);
- if (!field || !(field->type & type))
+ if (!field || !(field->type & field_mask))
return NULL;
return field;
}
@@ -650,6 +650,8 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
bpf_timer_cancel_and_free(obj + rec->timer_off);
}
+extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
+
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;
@@ -659,8 +661,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
return;
fields = rec->fields;
for (i = 0; i < rec->cnt; i++) {
+ struct btf_struct_meta *pointee_struct_meta;
const struct btf_field *field = &fields[i];
void *field_ptr = obj + field->offset;
+ void *xchgd_field;
switch (fields[i].type) {
case BPF_SPIN_LOCK:
@@ -672,7 +676,22 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
WRITE_ONCE(*(u64 *)field_ptr, 0);
break;
case BPF_KPTR_REF:
- field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
+ xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
+ if (!xchgd_field)
+ break;
+
+ if (!btf_is_kernel(field->kptr.btf)) {
+ pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
+ field->kptr.btf_id);
+ WARN_ON_ONCE(!pointee_struct_meta);
+ migrate_disable();
+ __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
+ pointee_struct_meta->record :
+ NULL);
+ migrate_enable();
+ } else {
+ field->kptr.dtor(xchgd_field);
+ }
break;
case BPF_LIST_HEAD:
if (WARN_ON_ONCE(rec->spin_lock_off < 0))
@@ -1287,8 +1306,10 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
return map;
}
-/* map_idr_lock should have been held */
-static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+/* map_idr_lock should have been held or the map should have been
+ * protected by rcu read lock.
+ */
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
{
int refold;
@@ -2051,6 +2072,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
{
bpf_prog_kallsyms_del_all(prog);
btf_put(prog->aux->btf);
+ module_put(prog->aux->mod);
kvfree(prog->aux->jited_linfo);
kvfree(prog->aux->linfo);
kfree(prog->aux->kfunc_tab);
@@ -2479,9 +2501,9 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size
+#define BPF_PROG_LOAD_LAST_FIELD log_true_size
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
@@ -2631,7 +2653,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
goto free_prog_sec;
/* run eBPF verifier */
- err = bpf_check(&prog, attr, uattr);
+ err = bpf_check(&prog, attr, uattr, uattr_size);
if (err < 0)
goto free_used_maps;
@@ -2806,16 +2828,19 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
const struct bpf_prog *prog = link->prog;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
- bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m,
"link_type:\t%s\n"
- "link_id:\t%u\n"
- "prog_tag:\t%s\n"
- "prog_id:\t%u\n",
+ "link_id:\t%u\n",
bpf_link_type_strs[link->type],
- link->id,
- prog_tag,
- prog->aux->id);
+ link->id);
+ if (prog) {
+ bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+ seq_printf(m,
+ "prog_tag:\t%s\n"
+ "prog_id:\t%u\n",
+ prog_tag,
+ prog->aux->id);
+ }
if (link->ops->show_fdinfo)
link->ops->show_fdinfo(link, m);
}
@@ -3097,6 +3122,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
if (err)
goto out_unlock;
+ if (tgt_info.tgt_mod) {
+ module_put(prog->aux->mod);
+ prog->aux->mod = tgt_info.tgt_mod;
+ }
+
tr = bpf_trampoline_get(key, &tgt_info);
if (!tr) {
err = -ENOMEM;
@@ -4290,7 +4320,8 @@ static int bpf_link_get_info_by_fd(struct file *file,
info.type = link->type;
info.id = link->id;
- info.prog_id = link->prog->aux->id;
+ if (link->prog)
+ info.prog_id = link->prog->aux->id;
if (link->ops->fill_link_info) {
err = link->ops->fill_link_info(link, &info);
@@ -4340,9 +4371,9 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
-#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
+#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
-static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
+static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
@@ -4350,7 +4381,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
if (!bpf_capable())
return -EPERM;
- return btf_new_fd(attr, uattr);
+ return btf_new_fd(attr, uattr, uattr_size);
}
#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
@@ -4553,6 +4584,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;
+ if (attr->link_create.attach_type == BPF_STRUCT_OPS)
+ return bpf_struct_ops_link_create(attr);
+
prog = bpf_prog_get(attr->link_create.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -4651,6 +4685,35 @@ out:
return ret;
}
+static int link_update_map(struct bpf_link *link, union bpf_attr *attr)
+{
+ struct bpf_map *new_map, *old_map = NULL;
+ int ret;
+
+ new_map = bpf_map_get(attr->link_update.new_map_fd);
+ if (IS_ERR(new_map))
+ return PTR_ERR(new_map);
+
+ if (attr->link_update.flags & BPF_F_REPLACE) {
+ old_map = bpf_map_get(attr->link_update.old_map_fd);
+ if (IS_ERR(old_map)) {
+ ret = PTR_ERR(old_map);
+ goto out_put;
+ }
+ } else if (attr->link_update.old_map_fd) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = link->ops->update_map(link, new_map, old_map);
+
+ if (old_map)
+ bpf_map_put(old_map);
+out_put:
+ bpf_map_put(new_map);
+ return ret;
+}
+
#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
static int link_update(union bpf_attr *attr)
@@ -4671,6 +4734,11 @@ static int link_update(union bpf_attr *attr)
if (IS_ERR(link))
return PTR_ERR(link);
+ if (link->ops->update_map) {
+ ret = link_update_map(link, attr);
+ goto out_put_link;
+ }
+
new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
if (IS_ERR(new_prog)) {
ret = PTR_ERR(new_prog);
@@ -4991,7 +5059,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
- err = bpf_prog_load(&attr, uattr);
+ err = bpf_prog_load(&attr, uattr, size);
break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
@@ -5036,7 +5104,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
err = bpf_raw_tracepoint_open(&attr);
break;
case BPF_BTF_LOAD:
- err = bpf_btf_load(&attr, uattr);
+ err = bpf_btf_load(&attr, uattr, size);
break;
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d0ed7d6f5eec..f61d5138b12b 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -9,7 +9,6 @@
#include <linux/btf.h>
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>
-#include <linux/module.h>
#include <linux/static_call.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf_lsm.h>
@@ -172,26 +171,6 @@ out:
return tr;
}
-static int bpf_trampoline_module_get(struct bpf_trampoline *tr)
-{
- struct module *mod;
- int err = 0;
-
- preempt_disable();
- mod = __module_text_address((unsigned long) tr->func.addr);
- if (mod && !try_module_get(mod))
- err = -ENOENT;
- preempt_enable();
- tr->mod = mod;
- return err;
-}
-
-static void bpf_trampoline_module_put(struct bpf_trampoline *tr)
-{
- module_put(tr->mod);
- tr->mod = NULL;
-}
-
static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
{
void *ip = tr->func.addr;
@@ -202,8 +181,6 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
else
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
- if (!ret)
- bpf_trampoline_module_put(tr);
return ret;
}
@@ -238,9 +215,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
tr->func.ftrace_managed = true;
}
- if (bpf_trampoline_module_get(tr))
- return -ENOENT;
-
if (tr->func.ftrace_managed) {
ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
ret = register_ftrace_direct_multi(tr->fops, (long)new_addr);
@@ -248,8 +222,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
}
- if (ret)
- bpf_trampoline_module_put(tr);
return ret;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b2116ca78d9a..6e12cc296ad8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -24,6 +24,7 @@
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/poison.h>
+#include <linux/module.h>
#include "disasm.h"
@@ -302,6 +303,10 @@ struct bpf_kfunc_call_arg_meta {
enum bpf_dynptr_type type;
u32 id;
} initialized_dynptr;
+ struct {
+ u8 spi;
+ u8 frameno;
+ } iter;
u64 mem_size;
};
@@ -330,61 +335,6 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
return &linfo[i - 1];
}
-void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
- va_list args)
-{
- unsigned int n;
-
- n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
-
- WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
- "verifier log line truncated - local buffer too short\n");
-
- if (log->level == BPF_LOG_KERNEL) {
- bool newline = n > 0 && log->kbuf[n - 1] == '\n';
-
- pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
- return;
- }
-
- n = min(log->len_total - log->len_used - 1, n);
- log->kbuf[n] = '\0';
- if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
- log->len_used += n;
- else
- log->ubuf = NULL;
-}
-
-static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
-{
- char zero = 0;
-
- if (!bpf_verifier_log_needed(log))
- return;
-
- log->len_used = new_pos;
- if (put_user(zero, log->ubuf + new_pos))
- log->ubuf = NULL;
-}
-
-/* log_level controls verbosity level of eBPF verifier.
- * bpf_verifier_log_write() is used to dump the verification trace to the log,
- * so the user can figure out what's wrong with the program
- */
-__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
- const char *fmt, ...)
-{
- va_list args;
-
- if (!bpf_verifier_log_needed(&env->log))
- return;
-
- va_start(args, fmt);
- bpf_verifier_vlog(&env->log, fmt, args);
- va_end(args);
-}
-EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
-
__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
struct bpf_verifier_env *env = private_data;
@@ -398,20 +348,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
va_end(args);
}
-__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
- const char *fmt, ...)
-{
- va_list args;
-
- if (!bpf_verifier_log_needed(log))
- return;
-
- va_start(args, fmt);
- bpf_verifier_vlog(log, fmt, args);
- va_end(args);
-}
-EXPORT_SYMBOL_GPL(bpf_log);
-
static const char *ltrim(const char *s)
{
while (isspace(*s))
@@ -481,8 +417,17 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
type == PTR_TO_XDP_SOCK;
}
+static bool type_may_be_null(u32 type)
+{
+ return type & PTR_MAYBE_NULL;
+}
+
static bool reg_type_not_null(enum bpf_reg_type type)
{
+ if (type_may_be_null(type))
+ return false;
+
+ type = base_type(type);
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
@@ -526,11 +471,6 @@ static bool type_is_rdonly_mem(u32 type)
return type & MEM_RDONLY;
}
-static bool type_may_be_null(u32 type)
-{
- return type & PTR_MAYBE_NULL;
-}
-
static bool is_acquire_function(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -668,6 +608,7 @@ static char slot_type_char[] = {
[STACK_MISC] = 'm',
[STACK_ZERO] = '0',
[STACK_DYNPTR] = 'd',
+ [STACK_ITER] = 'i',
};
static void print_liveness(struct bpf_verifier_env *env,
@@ -742,7 +683,12 @@ static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *re
return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
}
-static const char *kernel_type_name(const struct btf* btf, u32 id)
+static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
+{
+ return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
+}
+
+static const char *btf_type_name(const struct btf *btf, u32 id)
{
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
@@ -766,6 +712,30 @@ static const char *dynptr_type_str(enum bpf_dynptr_type type)
}
}
+static const char *iter_type_str(const struct btf *btf, u32 btf_id)
+{
+ if (!btf || btf_id == 0)
+ return "<invalid>";
+
+ /* we already validated that type is valid and has conforming name */
+ return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
+}
+
+static const char *iter_state_str(enum bpf_iter_state state)
+{
+ switch (state) {
+ case BPF_ITER_STATE_ACTIVE:
+ return "active";
+ case BPF_ITER_STATE_DRAINED:
+ return "drained";
+ case BPF_ITER_STATE_INVALID:
+ return "<invalid>";
+ default:
+ WARN_ONCE(1, "unknown iter state %d\n", state);
+ return "<unknown>";
+ }
+}
+
static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
{
env->scratched_regs |= 1U << regno;
@@ -1118,6 +1088,157 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
}
}
+static void __mark_reg_known_zero(struct bpf_reg_state *reg);
+
+static int mark_stack_slots_iter(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int insn_idx,
+ struct btf *btf, u32 btf_id, int nr_slots)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i, j, id;
+
+ spi = iter_get_spi(env, reg, nr_slots);
+ if (spi < 0)
+ return spi;
+
+ id = acquire_reference_state(env, insn_idx);
+ if (id < 0)
+ return id;
+
+ for (i = 0; i < nr_slots; i++) {
+ struct bpf_stack_state *slot = &state->stack[spi - i];
+ struct bpf_reg_state *st = &slot->spilled_ptr;
+
+ __mark_reg_known_zero(st);
+ st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
+ st->live |= REG_LIVE_WRITTEN;
+ st->ref_obj_id = i == 0 ? id : 0;
+ st->iter.btf = btf;
+ st->iter.btf_id = btf_id;
+ st->iter.state = BPF_ITER_STATE_ACTIVE;
+ st->iter.depth = 0;
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ slot->slot_type[j] = STACK_ITER;
+
+ mark_stack_slot_scratched(env, spi - i);
+ }
+
+ return 0;
+}
+
+static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int nr_slots)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i, j;
+
+ spi = iter_get_spi(env, reg, nr_slots);
+ if (spi < 0)
+ return spi;
+
+ for (i = 0; i < nr_slots; i++) {
+ struct bpf_stack_state *slot = &state->stack[spi - i];
+ struct bpf_reg_state *st = &slot->spilled_ptr;
+
+ if (i == 0)
+ WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
+
+ __mark_reg_not_init(env, st);
+
+ /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
+ st->live |= REG_LIVE_WRITTEN;
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ slot->slot_type[j] = STACK_INVALID;
+
+ mark_stack_slot_scratched(env, spi - i);
+ }
+
+ return 0;
+}
+
+static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int nr_slots)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i, j;
+
+ /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
+ * will do check_mem_access to check and update stack bounds later, so
+ * return true for that case.
+ */
+ spi = iter_get_spi(env, reg, nr_slots);
+ if (spi == -ERANGE)
+ return true;
+ if (spi < 0)
+ return false;
+
+ for (i = 0; i < nr_slots; i++) {
+ struct bpf_stack_state *slot = &state->stack[spi - i];
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ if (slot->slot_type[j] == STACK_ITER)
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ struct btf *btf, u32 btf_id, int nr_slots)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i, j;
+
+ spi = iter_get_spi(env, reg, nr_slots);
+ if (spi < 0)
+ return false;
+
+ for (i = 0; i < nr_slots; i++) {
+ struct bpf_stack_state *slot = &state->stack[spi - i];
+ struct bpf_reg_state *st = &slot->spilled_ptr;
+
+ /* only main (first) slot has ref_obj_id set */
+ if (i == 0 && !st->ref_obj_id)
+ return false;
+ if (i != 0 && st->ref_obj_id)
+ return false;
+ if (st->iter.btf != btf || st->iter.btf_id != btf_id)
+ return false;
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ if (slot->slot_type[j] != STACK_ITER)
+ return false;
+ }
+
+ return true;
+}
+
+/* Check if given stack slot is "special":
+ * - spilled register state (STACK_SPILL);
+ * - dynptr state (STACK_DYNPTR);
+ * - iter state (STACK_ITER).
+ */
+static bool is_stack_slot_special(const struct bpf_stack_state *stack)
+{
+ enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
+
+ switch (type) {
+ case STACK_SPILL:
+ case STACK_DYNPTR:
+ case STACK_ITER:
+ return true;
+ case STACK_INVALID:
+ case STACK_MISC:
+ case STACK_ZERO:
+ return false;
+ default:
+ WARN_ONCE(1, "unknown stack slot type %d\n", type);
+ return true;
+ }
+}
+
/* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack.
*/
@@ -1164,7 +1285,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, "%s", reg_type_str(env, t));
if (base_type(t) == PTR_TO_BTF_ID)
- verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
+ verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
verbose(env, "(");
/*
* _a stands for append, was shortened to avoid multiline statements below.
@@ -1267,6 +1388,19 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (reg->ref_obj_id)
verbose(env, "(ref_id=%d)", reg->ref_obj_id);
break;
+ case STACK_ITER:
+ /* only main slot has ref_obj_id set; skip others */
+ reg = &state->stack[i].spilled_ptr;
+ if (!reg->ref_obj_id)
+ continue;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
+ iter_type_str(reg->iter.btf, reg->iter.btf_id),
+ reg->ref_obj_id, iter_state_str(reg->iter.state),
+ reg->iter.depth);
+ break;
case STACK_MISC:
case STACK_ZERO:
default:
@@ -1305,10 +1439,10 @@ static inline u32 vlog_alignment(u32 pos)
static void print_insn_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state)
{
- if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
+ if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
/* remove new line character */
- bpf_vlog_reset(&env->log, env->prev_log_len - 1);
- verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
+ bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
+ verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
} else {
verbose(env, "%d:", env->insn_idx);
}
@@ -1616,7 +1750,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
elem->next = env->head;
- elem->log_pos = env->log.len_used;
+ elem->log_pos = env->log.end_pos;
env->head = elem;
env->stack_size++;
err = copy_verifier_state(&elem->st, cur);
@@ -1946,9 +2080,9 @@ static void __reg_bound_offset(struct bpf_reg_state *reg)
struct tnum var64_off = tnum_intersect(reg->var_off,
tnum_range(reg->umin_value,
reg->umax_value));
- struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
- tnum_range(reg->u32_min_value,
- reg->u32_max_value));
+ struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
+ tnum_range(reg->u32_min_value,
+ reg->u32_max_value));
reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
}
@@ -2152,7 +2286,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
elem->next = env->head;
- elem->log_pos = env->log.len_used;
+ elem->log_pos = env->log.end_pos;
env->head = elem;
env->stack_size++;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
@@ -2710,6 +2844,25 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *
state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
}
+static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ int spi, int nr_slots)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int err, i;
+
+ for (i = 0; i < nr_slots; i++) {
+ struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
+
+ err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
+ if (err)
+ return err;
+
+ mark_stack_slot_scratched(env, spi - i);
+ }
+
+ return 0;
+}
+
/* This function is supposed to be used by the following 32-bit optimization
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
@@ -3090,6 +3243,21 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
}
} else if (opcode == BPF_EXIT) {
return -ENOTSUPP;
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ if (!(*reg_mask & (dreg | sreg)))
+ return 0;
+ /* dreg <cond> sreg
+ * Both dreg and sreg need precision before
+ * this insn. If only sreg was marked precise
+ * before it would be equally necessary to
+ * propagate it to dreg.
+ */
+ *reg_mask |= (sreg | dreg);
+ /* else dreg <cond> K
+ * Only dreg still needs precision before
+ * this insn, so for the K-based conditional
+ * there is nothing new to be marked.
+ */
}
} else if (class == BPF_LD) {
if (!(*reg_mask & dreg))
@@ -3691,8 +3859,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
/* regular write of data into stack destroys any spilled ptr */
state->stack[spi].spilled_ptr.type = NOT_INIT;
- /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
- if (is_spilled_reg(&state->stack[spi]))
+ /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
+ if (is_stack_slot_special(&state->stack[spi]))
for (i = 0; i < BPF_REG_SIZE; i++)
scrub_spilled_slot(&state->stack[spi].slot_type[i]);
@@ -4085,17 +4253,13 @@ static int check_stack_read(struct bpf_verifier_env *env,
}
/* Variable offset is prohibited for unprivileged mode for simplicity
* since it requires corresponding support in Spectre masking for stack
- * ALU. See also retrieve_ptr_limit().
+ * ALU. See also retrieve_ptr_limit(). The check in
+ * check_stack_access_for_ptr_arithmetic() called by
+ * adjust_ptr_min_max_vals() prevents users from creating stack pointers
+ * with variable offsets, therefore no check is required here. Further,
+ * just checking it here would be insufficient as speculative stack
+ * writes could still lead to unsafe speculative behaviour.
*/
- if (!env->bypass_spec_v1 && var_off) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
- ptr_regno, tn_buf);
- return -EACCES;
- }
-
if (!var_off) {
off += reg->var_off.value;
err = check_stack_read_fixed_off(env, state, off, size,
@@ -4301,7 +4465,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
struct btf_field *kptr_field,
struct bpf_reg_state *reg, u32 regno)
{
- const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
+ const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
const char *reg_name = "";
@@ -4317,7 +4481,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
return -EINVAL;
}
/* We need to verify reg->type and reg->btf, before accessing reg->btf */
- reg_name = kernel_type_name(reg->btf, reg->btf_id);
+ reg_name = btf_type_name(reg->btf, reg->btf_id);
/* For ref_ptr case, release function check should ensure we get one
* referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
@@ -4381,6 +4545,8 @@ static bool in_rcu_cs(struct bpf_verifier_env *env)
BTF_SET_START(rcu_protected_types)
BTF_ID(struct, prog_test_ref_kfunc)
BTF_ID(struct, cgroup)
+BTF_ID(struct, bpf_cpumask)
+BTF_ID(struct, task_struct)
BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
@@ -4754,6 +4920,11 @@ static bool is_rcu_reg(const struct bpf_reg_state *reg)
return reg->type & MEM_RCU;
}
+static void clear_trusted_flags(enum bpf_type_flag *flag)
+{
+ *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -5158,6 +5329,7 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
}
#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
+#define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
/*
@@ -5174,18 +5346,39 @@ BTF_TYPE_SAFE_RCU(struct task_struct) {
struct task_struct *group_leader;
};
+BTF_TYPE_SAFE_RCU(struct cgroup) {
+ /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
+ struct kernfs_node *kn;
+};
+
BTF_TYPE_SAFE_RCU(struct css_set) {
struct cgroup *dfl_cgrp;
};
+/* RCU trusted: these fields are trusted in RCU CS and can be NULL */
+BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
+ struct file __rcu *exe_file;
+};
+
+/* skb->sk, req->sk are not RCU protected, but we mark them as such
+ * because bpf prog accessible sockets are SOCK_RCU_FREE.
+ */
+BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
+ struct sock *sk;
+};
+
+BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
+ struct sock *sk;
+};
+
/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
- __bpf_md_ptr(struct seq_file *, seq);
+ struct seq_file *seq;
};
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
- __bpf_md_ptr(struct bpf_iter_meta *, meta);
- __bpf_md_ptr(struct task_struct *, task);
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
};
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
@@ -5207,17 +5400,29 @@ BTF_TYPE_SAFE_TRUSTED(struct socket) {
static bool type_is_rcu(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
- int off)
+ const char *field_name, u32 btf_id)
{
BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
- return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
+ return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
+}
+
+static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ const char *field_name, u32 btf_id)
+{
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
+
+ return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
}
static bool type_is_trusted(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
- int off)
+ const char *field_name, u32 btf_id)
{
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
@@ -5226,7 +5431,7 @@ static bool type_is_trusted(struct bpf_verifier_env *env,
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
- return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
+ return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
}
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
@@ -5238,8 +5443,9 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = regs + regno;
const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+ const char *field_name = NULL;
enum bpf_type_flag flag = 0;
- u32 btf_id;
+ u32 btf_id = 0;
int ret;
if (!env->allow_ptr_leaks) {
@@ -5284,12 +5490,12 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
- if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
+ if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
if (!btf_is_kernel(reg->btf)) {
verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
return -EFAULT;
}
- ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
+ ret = env->ops->btf_struct_access(&env->log, reg, off, size);
} else {
/* Writes are permitted with default btf_struct_access for
* program allocated objects (which always have ref_obj_id > 0),
@@ -5306,7 +5512,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EFAULT;
}
- ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
+ ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
}
if (ret < 0)
@@ -5334,20 +5540,21 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
* A regular RCU-protected pointer with __rcu tag can also be deemed
* trusted if we are in an RCU CS. Such pointer can be NULL.
*/
- if (type_is_trusted(env, reg, off)) {
+ if (type_is_trusted(env, reg, field_name, btf_id)) {
flag |= PTR_TRUSTED;
} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
- if (type_is_rcu(env, reg, off)) {
+ if (type_is_rcu(env, reg, field_name, btf_id)) {
/* ignore __rcu tag and mark it MEM_RCU */
flag |= MEM_RCU;
- } else if (flag & MEM_RCU) {
+ } else if (flag & MEM_RCU ||
+ type_is_rcu_or_null(env, reg, field_name, btf_id)) {
/* __rcu tagged pointers can be NULL */
- flag |= PTR_MAYBE_NULL;
+ flag |= MEM_RCU | PTR_MAYBE_NULL;
} else if (flag & (MEM_PERCPU | MEM_USER)) {
/* keep as-is */
} else {
- /* walking unknown pointers yields untrusted pointer */
- flag = PTR_UNTRUSTED;
+ /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
+ clear_trusted_flags(&flag);
}
} else {
/*
@@ -5361,7 +5568,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
} else {
/* Old compat. Deprecated */
- flag &= ~PTR_TRUSTED;
+ clear_trusted_flags(&flag);
}
if (atype == BPF_READ && value_regno >= 0)
@@ -5420,7 +5627,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
/* Simulate access to a PTR_TO_BTF_ID */
memset(&map_reg, 0, sizeof(map_reg));
mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
- ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
+ ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
if (ret < 0)
return ret;
@@ -6086,6 +6293,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
env,
regno, reg->off, access_size,
zero_size_allowed, ACCESS_HELPER, meta);
+ case PTR_TO_BTF_ID:
+ return check_ptr_to_btf_access(env, regs, regno, reg->off,
+ access_size, BPF_READ, -1);
case PTR_TO_CTX:
/* in case the function doesn't know how to access the context,
* (because we are in a program of type SYSCALL for example), we
@@ -6506,6 +6716,203 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
return err;
}
+static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
+{
+ struct bpf_func_state *state = func(env, reg);
+
+ return state->stack[spi].spilled_ptr.ref_obj_id;
+}
+
+static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
+}
+
+static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_ITER_NEW;
+}
+
+static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_ITER_NEXT;
+}
+
+static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_ITER_DESTROY;
+}
+
+static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
+{
+ /* btf_check_iter_kfuncs() guarantees that first argument of any iter
+ * kfunc is iter state pointer
+ */
+ return arg == 0 && is_iter_kfunc(meta);
+}
+
+static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ const struct btf_type *t;
+ const struct btf_param *arg;
+ int spi, err, i, nr_slots;
+ u32 btf_id;
+
+ /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
+ arg = &btf_params(meta->func_proto)[0];
+ t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */
+ t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */
+ nr_slots = t->size / BPF_REG_SIZE;
+
+ if (is_iter_new_kfunc(meta)) {
+ /* bpf_iter_<type>_new() expects pointer to uninit iter state */
+ if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
+ verbose(env, "expected uninitialized iter_%s as arg #%d\n",
+ iter_type_str(meta->btf, btf_id), regno);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
+ err = check_mem_access(env, insn_idx, regno,
+ i, BPF_DW, BPF_WRITE, -1, false);
+ if (err)
+ return err;
+ }
+
+ err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
+ if (err)
+ return err;
+ } else {
+ /* iter_next() or iter_destroy() expect initialized iter state*/
+ if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
+ verbose(env, "expected an initialized iter_%s as arg #%d\n",
+ iter_type_str(meta->btf, btf_id), regno);
+ return -EINVAL;
+ }
+
+ spi = iter_get_spi(env, reg, nr_slots);
+ if (spi < 0)
+ return spi;
+
+ err = mark_iter_read(env, reg, spi, nr_slots);
+ if (err)
+ return err;
+
+ /* remember meta->iter info for process_iter_next_call() */
+ meta->iter.spi = spi;
+ meta->iter.frameno = reg->frameno;
+ meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
+
+ if (is_iter_destroy_kfunc(meta)) {
+ err = unmark_stack_slots_iter(env, reg, nr_slots);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/* process_iter_next_call() is called when verifier gets to iterator's next
+ * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
+ * to it as just "iter_next()" in comments below.
+ *
+ * BPF verifier relies on a crucial contract for any iter_next()
+ * implementation: it should *eventually* return NULL, and once that happens
+ * it should keep returning NULL. That is, once iterator exhausts elements to
+ * iterate, it should never reset or spuriously return new elements.
+ *
+ * With the assumption of such contract, process_iter_next_call() simulates
+ * a fork in the verifier state to validate loop logic correctness and safety
+ * without having to simulate infinite amount of iterations.
+ *
+ * In current state, we first assume that iter_next() returned NULL and
+ * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
+ * conditions we should not form an infinite loop and should eventually reach
+ * exit.
+ *
+ * Besides that, we also fork current state and enqueue it for later
+ * verification. In a forked state we keep iterator state as ACTIVE
+ * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
+ * also bump iteration depth to prevent erroneous infinite loop detection
+ * later on (see iter_active_depths_differ() comment for details). In this
+ * state we assume that we'll eventually loop back to another iter_next()
+ * calls (it could be in exactly same location or in some other instruction,
+ * it doesn't matter, we don't make any unnecessary assumptions about this,
+ * everything revolves around iterator state in a stack slot, not which
+ * instruction is calling iter_next()). When that happens, we either will come
+ * to iter_next() with equivalent state and can conclude that next iteration
+ * will proceed in exactly the same way as we just verified, so it's safe to
+ * assume that loop converges. If not, we'll go on another iteration
+ * simulation with a different input state, until all possible starting states
+ * are validated or we reach maximum number of instructions limit.
+ *
+ * This way, we will either exhaustively discover all possible input states
+ * that iterator loop can start with and eventually will converge, or we'll
+ * effectively regress into bounded loop simulation logic and either reach
+ * maximum number of instructions if loop is not provably convergent, or there
+ * is some statically known limit on number of iterations (e.g., if there is
+ * an explicit `if n > 100 then break;` statement somewhere in the loop).
+ *
+ * One very subtle but very important aspect is that we *always* simulate NULL
+ * condition first (as the current state) before we simulate non-NULL case.
+ * This has to do with intricacies of scalar precision tracking. By simulating
+ * "exit condition" of iter_next() returning NULL first, we make sure all the
+ * relevant precision marks *that will be set **after** we exit iterator loop*
+ * are propagated backwards to common parent state of NULL and non-NULL
+ * branches. Thanks to that, state equivalence checks done later in forked
+ * state, when reaching iter_next() for ACTIVE iterator, can assume that
+ * precision marks are finalized and won't change. Because simulating another
+ * ACTIVE iterator iteration won't change them (because given same input
+ * states we'll end up with exactly same output states which we are currently
+ * comparing; and verification after the loop already propagated back what
+ * needs to be **additionally** tracked as precise). It's subtle, grok
+ * precision tracking for more intuitive understanding.
+ */
+static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
+ struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
+ struct bpf_reg_state *cur_iter, *queued_iter;
+ int iter_frameno = meta->iter.frameno;
+ int iter_spi = meta->iter.spi;
+
+ BTF_TYPE_EMIT(struct bpf_iter);
+
+ cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+
+ if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
+ cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
+ verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
+ cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
+ return -EFAULT;
+ }
+
+ if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
+ /* branch out active iter state */
+ queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
+ if (!queued_st)
+ return -ENOMEM;
+
+ queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
+ queued_iter->iter.depth++;
+
+ queued_fr = queued_st->frame[queued_st->curframe];
+ mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
+ }
+
+ /* switch to DRAINED state, but keep the depth unchanged */
+ /* mark current iter state as drained and assume returned NULL */
+ cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
+ __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
+
+ return 0;
+}
+
static bool arg_type_is_mem_size(enum bpf_arg_type type)
{
return type == ARG_CONST_SIZE ||
@@ -6600,6 +7007,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MEM,
PTR_TO_MEM | MEM_RINGBUF,
PTR_TO_BUF,
+ PTR_TO_BTF_ID | PTR_TRUSTED,
},
};
@@ -6709,6 +7117,9 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (arg_type & PTR_MAYBE_NULL)
type &= ~PTR_MAYBE_NULL;
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC)
+ type &= ~MEM_ALLOC;
+
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
if (expected == NOT_INIT)
@@ -6728,10 +7139,23 @@ found:
if (base_type(reg->type) != PTR_TO_BTF_ID)
return 0;
+ if (compatible == &mem_types) {
+ if (!(arg_type & MEM_RDONLY)) {
+ verbose(env,
+ "%s() may write into memory pointed by R%d type=%s\n",
+ func_id_name(meta->func_id),
+ regno, reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+ return 0;
+ }
+
switch ((int)reg->type) {
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
+ case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
+ case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
{
/* For bpf_sk_release, it needs to match against first member
* 'struct sock_common', hence make an exception for it. This
@@ -6740,6 +7164,12 @@ found:
bool strict_type_match = arg_type_is_release(arg_type) &&
meta->func_id != BPF_FUNC_sk_release;
+ if (type_may_be_null(reg->type) &&
+ (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
+ verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
+ return -EACCES;
+ }
+
if (!arg_btf_id) {
if (!compatible->btf_id) {
verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
@@ -6763,15 +7193,16 @@ found:
btf_vmlinux, *arg_btf_id,
strict_type_match)) {
verbose(env, "R%d is of type %s but %s is expected\n",
- regno, kernel_type_name(reg->btf, reg->btf_id),
- kernel_type_name(btf_vmlinux, *arg_btf_id));
+ regno, btf_type_name(reg->btf, reg->btf_id),
+ btf_type_name(btf_vmlinux, *arg_btf_id));
return -EACCES;
}
}
break;
}
case PTR_TO_BTF_ID | MEM_ALLOC:
- if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
+ if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
+ meta->func_id != BPF_FUNC_kptr_xchg) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
return -EFAULT;
}
@@ -6834,7 +7265,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
verbose(env, "R%d must have zero offset when passed to release func\n",
regno);
verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
- kernel_type_name(reg->btf, reg->btf_id), reg->off);
+ btf_type_name(reg->btf, reg->btf_id), reg->off);
return -EINVAL;
}
@@ -8737,6 +9168,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (func_id == BPF_FUNC_kptr_xchg) {
ret_btf = meta.kptr_field->kptr.btf;
ret_btf_id = meta.kptr_field->kptr.btf_id;
+ if (!btf_is_kernel(ret_btf))
+ regs[BPF_REG_0].type |= MEM_ALLOC;
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
verbose(env, "verifier internal error:");
@@ -8870,7 +9303,7 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
{
- return meta->kfunc_flags & KF_TRUSTED_ARGS;
+ return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
}
static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
@@ -9099,6 +9532,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */
KF_ARG_PTR_TO_DYNPTR,
+ KF_ARG_PTR_TO_ITER,
KF_ARG_PTR_TO_LIST_HEAD,
KF_ARG_PTR_TO_LIST_NODE,
KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
@@ -9220,6 +9654,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_DYNPTR;
+ if (is_kfunc_arg_iter(meta, argno))
+ return KF_ARG_PTR_TO_ITER;
+
if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_LIST_HEAD;
@@ -9848,6 +10285,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
break;
case KF_ARG_PTR_TO_KPTR:
case KF_ARG_PTR_TO_DYNPTR:
+ case KF_ARG_PTR_TO_ITER:
case KF_ARG_PTR_TO_LIST_HEAD:
case KF_ARG_PTR_TO_LIST_NODE:
case KF_ARG_PTR_TO_RB_ROOT:
@@ -9944,6 +10382,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
break;
}
+ case KF_ARG_PTR_TO_ITER:
+ ret = process_iter_arg(env, regno, insn_idx, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_LIST_HEAD:
if (reg->type != PTR_TO_MAP_VALUE &&
reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
@@ -10079,24 +10522,21 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return 0;
}
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
- int *insn_idx_p)
+static int fetch_kfunc_meta(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ struct bpf_kfunc_call_arg_meta *meta,
+ const char **kfunc_name)
{
- const struct btf_type *t, *func, *func_proto, *ptr_type;
- u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
- struct bpf_reg_state *regs = cur_regs(env);
- const char *func_name, *ptr_type_name;
- bool sleepable, rcu_lock, rcu_unlock;
- struct bpf_kfunc_call_arg_meta meta;
- int err, insn_idx = *insn_idx_p;
- const struct btf_param *args;
- const struct btf_type *ret_t;
+ const struct btf_type *func, *func_proto;
+ u32 func_id, *kfunc_flags;
+ const char *func_name;
struct btf *desc_btf;
- u32 *kfunc_flags;
- /* skip for now, but return error when we find this in fixup_kfunc_call */
+ if (kfunc_name)
+ *kfunc_name = NULL;
+
if (!insn->imm)
- return 0;
+ return -EINVAL;
desc_btf = find_kfunc_desc_btf(env, insn->off);
if (IS_ERR(desc_btf))
@@ -10105,22 +10545,53 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
func_id = insn->imm;
func = btf_type_by_id(desc_btf, func_id);
func_name = btf_name_by_offset(desc_btf, func->name_off);
+ if (kfunc_name)
+ *kfunc_name = func_name;
func_proto = btf_type_by_id(desc_btf, func->type);
kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
if (!kfunc_flags) {
- verbose(env, "calling kernel function %s is not allowed\n",
- func_name);
return -EACCES;
}
- /* Prepare kfunc call metadata */
- memset(&meta, 0, sizeof(meta));
- meta.btf = desc_btf;
- meta.func_id = func_id;
- meta.kfunc_flags = *kfunc_flags;
- meta.func_proto = func_proto;
- meta.func_name = func_name;
+ memset(meta, 0, sizeof(*meta));
+ meta->btf = desc_btf;
+ meta->func_id = func_id;
+ meta->kfunc_flags = *kfunc_flags;
+ meta->func_proto = func_proto;
+ meta->func_name = func_name;
+
+ return 0;
+}
+
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx_p)
+{
+ const struct btf_type *t, *ptr_type;
+ u32 i, nargs, ptr_type_id, release_ref_obj_id;
+ struct bpf_reg_state *regs = cur_regs(env);
+ const char *func_name, *ptr_type_name;
+ bool sleepable, rcu_lock, rcu_unlock;
+ struct bpf_kfunc_call_arg_meta meta;
+ struct bpf_insn_aux_data *insn_aux;
+ int err, insn_idx = *insn_idx_p;
+ const struct btf_param *args;
+ const struct btf_type *ret_t;
+ struct btf *desc_btf;
+
+ /* skip for now, but return error when we find this in fixup_kfunc_call */
+ if (!insn->imm)
+ return 0;
+
+ err = fetch_kfunc_meta(env, insn, &meta, &func_name);
+ if (err == -EACCES && func_name)
+ verbose(env, "calling kernel function %s is not allowed\n", func_name);
+ if (err)
+ return err;
+ desc_btf = meta.btf;
+ insn_aux = &env->insn_aux_data[insn_idx];
+
+ insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
@@ -10173,7 +10644,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
err = release_reference(env, regs[meta.release_regno].ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
- func_name, func_id);
+ func_name, meta.func_id);
return err;
}
}
@@ -10185,14 +10656,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
- func_name, func_id);
+ func_name, meta.func_id);
return err;
}
err = release_reference(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
- func_name, func_id);
+ func_name, meta.func_id);
return err;
}
}
@@ -10202,7 +10673,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
set_rbtree_add_callback_state);
if (err) {
verbose(env, "kfunc %s#%d failed callback verification\n",
- func_name, func_id);
+ func_name, meta.func_id);
return err;
}
}
@@ -10211,7 +10682,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
- t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+ t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
/* Only exception is bpf_obj_new_impl */
@@ -10260,13 +10731,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
- env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
- env->insn_aux_data[insn_idx].kptr_struct_meta =
+ insn_aux->obj_new_size = ret_t->size;
+ insn_aux->kptr_struct_meta =
btf_find_struct_meta(ret_btf, ret_btf_id);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
- env->insn_aux_data[insn_idx].kptr_struct_meta =
- btf_find_struct_meta(meta.arg_obj_drop.btf,
- meta.arg_obj_drop.btf_id);
} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
struct btf_field *field = meta.arg_list_head.field;
@@ -10395,10 +10862,18 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
- } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
+ } else if (btf_type_is_void(t)) {
+ if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ insn_aux->kptr_struct_meta =
+ btf_find_struct_meta(meta.arg_obj_drop.btf,
+ meta.arg_obj_drop.btf_id);
+ }
+ }
+ }
- nargs = btf_type_vlen(func_proto);
- args = (const struct btf_param *)(func_proto + 1);
+ nargs = btf_type_vlen(meta.func_proto);
+ args = (const struct btf_param *)(meta.func_proto + 1);
for (i = 0; i < nargs; i++) {
u32 regno = i + 1;
@@ -10410,6 +10885,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
mark_btf_func_reg_size(env, regno, t->size);
}
+ if (is_iter_next_kfunc(&meta)) {
+ err = process_iter_next_call(env, insn_idx, &meta);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -12116,10 +12597,14 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
case BPF_JEQ:
if (tnum_is_const(subreg))
return !!tnum_equals_const(subreg, val);
+ else if (val < reg->u32_min_value || val > reg->u32_max_value)
+ return 0;
break;
case BPF_JNE:
if (tnum_is_const(subreg))
return !tnum_equals_const(subreg, val);
+ else if (val < reg->u32_min_value || val > reg->u32_max_value)
+ return 1;
break;
case BPF_JSET:
if ((~subreg.mask & subreg.value) & val)
@@ -12189,10 +12674,14 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
case BPF_JEQ:
if (tnum_is_const(reg->var_off))
return !!tnum_equals_const(reg->var_off, val);
+ else if (val < reg->umin_value || val > reg->umax_value)
+ return 0;
break;
case BPF_JNE:
if (tnum_is_const(reg->var_off))
return !tnum_equals_const(reg->var_off, val);
+ else if (val < reg->umin_value || val > reg->umax_value)
+ return 1;
break;
case BPF_JSET:
if ((~reg->var_off.mask & reg->var_off.value) & val)
@@ -12813,6 +13302,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
src_reg->var_off.value,
opcode,
is_jmp32);
+ } else if (dst_reg->type == SCALAR_VALUE &&
+ is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
+ pred = is_branch_taken(src_reg,
+ tnum_subreg(dst_reg->var_off).value,
+ flip_opcode(opcode),
+ is_jmp32);
+ } else if (dst_reg->type == SCALAR_VALUE &&
+ !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
+ pred = is_branch_taken(src_reg,
+ dst_reg->var_off.value,
+ flip_opcode(opcode),
+ is_jmp32);
} else if (reg_is_pkt_pointer_any(dst_reg) &&
reg_is_pkt_pointer_any(src_reg) &&
!is_jmp32) {
@@ -13407,6 +13908,17 @@ static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
return env->insn_aux_data[insn_idx].prune_point;
}
+static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].force_checkpoint = true;
+}
+
+static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].force_checkpoint;
+}
+
+
enum {
DONE_EXPLORING = 0,
KEEP_EXPLORING = 1,
@@ -13522,6 +14034,26 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
* async state will be pushed for further exploration.
*/
mark_prune_point(env, t);
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ struct bpf_kfunc_call_arg_meta meta;
+
+ ret = fetch_kfunc_meta(env, insn, &meta, NULL);
+ if (ret == 0 && is_iter_next_kfunc(&meta)) {
+ mark_prune_point(env, t);
+ /* Checking and saving state checkpoints at iter_next() call
+ * is crucial for fast convergence of open-coded iterator loop
+ * logic, so we need to force it. If we don't do that,
+ * is_state_visited() might skip saving a checkpoint, causing
+ * unnecessarily long sequence of not checkpointed
+ * instructions and jumps, leading to exhaustion of jump
+ * history buffer, and potentially other undesired outcomes.
+ * It is expected that with correct open-coded iterators
+ * convergence will happen quickly, so we don't run a risk of
+ * exhausting memory.
+ */
+ mark_force_checkpoint(env, t);
+ }
+ }
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
@@ -14275,6 +14807,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
* didn't use them
*/
for (i = 0; i < old->allocated_stack; i++) {
+ struct bpf_reg_state *old_reg, *cur_reg;
+
spi = i / BPF_REG_SIZE;
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
@@ -14331,9 +14865,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
return false;
break;
case STACK_DYNPTR:
- {
- const struct bpf_reg_state *old_reg, *cur_reg;
-
old_reg = &old->stack[spi].spilled_ptr;
cur_reg = &cur->stack[spi].spilled_ptr;
if (old_reg->dynptr.type != cur_reg->dynptr.type ||
@@ -14341,7 +14872,22 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
return false;
break;
- }
+ case STACK_ITER:
+ old_reg = &old->stack[spi].spilled_ptr;
+ cur_reg = &cur->stack[spi].spilled_ptr;
+ /* iter.depth is not compared between states as it
+ * doesn't matter for correctness and would otherwise
+ * prevent convergence; we maintain it only to prevent
+ * infinite loop check triggering, see
+ * iter_active_depths_differ()
+ */
+ if (old_reg->iter.btf != cur_reg->iter.btf ||
+ old_reg->iter.btf_id != cur_reg->iter.btf_id ||
+ old_reg->iter.state != cur_reg->iter.state ||
+ /* ignore {old_reg,cur_reg}->iter.depth, see above */
+ !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+ return false;
+ break;
case STACK_MISC:
case STACK_ZERO:
case STACK_INVALID:
@@ -14555,10 +15101,11 @@ static int propagate_precision(struct bpf_verifier_env *env,
state_reg = state->regs;
for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
+ !state_reg->precise ||
+ !(state_reg->live & REG_LIVE_READ))
continue;
if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "frame %d: propagating r%d\n", i, fr);
+ verbose(env, "frame %d: propagating r%d\n", fr, i);
err = mark_chain_precision_frame(env, fr, i);
if (err < 0)
return err;
@@ -14569,11 +15116,12 @@ static int propagate_precision(struct bpf_verifier_env *env,
continue;
state_reg = &state->stack[i].spilled_ptr;
if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
+ !state_reg->precise ||
+ !(state_reg->live & REG_LIVE_READ))
continue;
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "frame %d: propagating fp%d\n",
- (-i - 1) * BPF_REG_SIZE, fr);
+ fr, (-i - 1) * BPF_REG_SIZE);
err = mark_chain_precision_stack_frame(env, fr, i);
if (err < 0)
return err;
@@ -14600,6 +15148,92 @@ static bool states_maybe_looping(struct bpf_verifier_state *old,
return true;
}
+static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].is_iter_next;
+}
+
+/* is_state_visited() handles iter_next() (see process_iter_next_call() for
+ * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
+ * states to match, which otherwise would look like an infinite loop. So while
+ * iter_next() calls are taken care of, we still need to be careful and
+ * prevent erroneous and too eager declaration of "ininite loop", when
+ * iterators are involved.
+ *
+ * Here's a situation in pseudo-BPF assembly form:
+ *
+ * 0: again: ; set up iter_next() call args
+ * 1: r1 = &it ; <CHECKPOINT HERE>
+ * 2: call bpf_iter_num_next ; this is iter_next() call
+ * 3: if r0 == 0 goto done
+ * 4: ... something useful here ...
+ * 5: goto again ; another iteration
+ * 6: done:
+ * 7: r1 = &it
+ * 8: call bpf_iter_num_destroy ; clean up iter state
+ * 9: exit
+ *
+ * This is a typical loop. Let's assume that we have a prune point at 1:,
+ * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
+ * again`, assuming other heuristics don't get in a way).
+ *
+ * When we first time come to 1:, let's say we have some state X. We proceed
+ * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
+ * Now we come back to validate that forked ACTIVE state. We proceed through
+ * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
+ * are converging. But the problem is that we don't know that yet, as this
+ * convergence has to happen at iter_next() call site only. So if nothing is
+ * done, at 1: verifier will use bounded loop logic and declare infinite
+ * looping (and would be *technically* correct, if not for iterator's
+ * "eventual sticky NULL" contract, see process_iter_next_call()). But we
+ * don't want that. So what we do in process_iter_next_call() when we go on
+ * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
+ * a different iteration. So when we suspect an infinite loop, we additionally
+ * check if any of the *ACTIVE* iterator states depths differ. If yes, we
+ * pretend we are not looping and wait for next iter_next() call.
+ *
+ * This only applies to ACTIVE state. In DRAINED state we don't expect to
+ * loop, because that would actually mean infinite loop, as DRAINED state is
+ * "sticky", and so we'll keep returning into the same instruction with the
+ * same state (at least in one of possible code paths).
+ *
+ * This approach allows to keep infinite loop heuristic even in the face of
+ * active iterator. E.g., C snippet below is and will be detected as
+ * inifintely looping:
+ *
+ * struct bpf_iter_num it;
+ * int *p, x;
+ *
+ * bpf_iter_num_new(&it, 0, 10);
+ * while ((p = bpf_iter_num_next(&t))) {
+ * x = p;
+ * while (x--) {} // <<-- infinite loop here
+ * }
+ *
+ */
+static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
+{
+ struct bpf_reg_state *slot, *cur_slot;
+ struct bpf_func_state *state;
+ int i, fr;
+
+ for (fr = old->curframe; fr >= 0; fr--) {
+ state = old->frame[fr];
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_ITER)
+ continue;
+
+ slot = &state->stack[i].spilled_ptr;
+ if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
+ continue;
+
+ cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
+ if (cur_slot->iter.depth != slot->iter.depth)
+ return true;
+ }
+ }
+ return false;
+}
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
@@ -14607,7 +15241,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state_list *sl, **pprev;
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
- bool add_new_state = env->test_state_freq ? true : false;
+ bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
+ bool add_new_state = force_new_state;
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
@@ -14647,8 +15282,46 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* Since the verifier still needs to catch infinite loops
* inside async callbacks.
*/
- } else if (states_maybe_looping(&sl->state, cur) &&
- states_equal(env, &sl->state, cur)) {
+ goto skip_inf_loop_check;
+ }
+ /* BPF open-coded iterators loop detection is special.
+ * states_maybe_looping() logic is too simplistic in detecting
+ * states that *might* be equivalent, because it doesn't know
+ * about ID remapping, so don't even perform it.
+ * See process_iter_next_call() and iter_active_depths_differ()
+ * for overview of the logic. When current and one of parent
+ * states are detected as equivalent, it's a good thing: we prove
+ * convergence and can stop simulating further iterations.
+ * It's safe to assume that iterator loop will finish, taking into
+ * account iter_next() contract of eventually returning
+ * sticky NULL result.
+ */
+ if (is_iter_next_insn(env, insn_idx)) {
+ if (states_equal(env, &sl->state, cur)) {
+ struct bpf_func_state *cur_frame;
+ struct bpf_reg_state *iter_state, *iter_reg;
+ int spi;
+
+ cur_frame = cur->frame[cur->curframe];
+ /* btf_check_iter_kfuncs() enforces that
+ * iter state pointer is always the first arg
+ */
+ iter_reg = &cur_frame->regs[BPF_REG_1];
+ /* current state is valid due to states_equal(),
+ * so we can assume valid iter and reg state,
+ * no need for extra (re-)validations
+ */
+ spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
+ iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
+ if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE)
+ goto hit;
+ }
+ goto skip_inf_loop_check;
+ }
+ /* attempt to detect infinite loop to avoid unnecessary doomed work */
+ if (states_maybe_looping(&sl->state, cur) &&
+ states_equal(env, &sl->state, cur) &&
+ !iter_active_depths_differ(&sl->state, cur)) {
verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
return -EINVAL;
@@ -14665,13 +15338,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* This threshold shouldn't be too high either, since states
* at the end of the loop are likely to be useful in pruning.
*/
- if (!env->test_state_freq &&
+skip_inf_loop_check:
+ if (!force_new_state &&
env->jmps_processed - env->prev_jmps_processed < 20 &&
env->insn_processed - env->prev_insn_processed < 100)
add_new_state = false;
goto miss;
}
if (states_equal(env, &sl->state, cur)) {
+hit:
sl->hit_cnt++;
/* reached equivalent register/stack state,
* prune the search.
@@ -14978,11 +15653,11 @@ static int do_check(struct bpf_verifier_env *env)
print_insn_state(env, state->frame[state->curframe]);
verbose_linfo(env, env->insn_idx, "; ");
- env->prev_log_len = env->log.len_used;
+ env->prev_log_pos = env->log.end_pos;
verbose(env, "%d: ", env->insn_idx);
print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
- env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
- env->prev_log_len = env->log.len_used;
+ env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
+ env->prev_log_pos = env->log.end_pos;
}
if (bpf_prog_is_offloaded(env->prog->aux)) {
@@ -15301,8 +15976,8 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
goto err_put;
}
- if (!btf_type_is_var(t)) {
- verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
+ if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
err = -EINVAL;
goto err_put;
}
@@ -15315,6 +15990,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
err = -ENOENT;
goto err_put;
}
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
+
+ if (btf_type_is_func(t)) {
+ aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
+ aux->btf_var.mem_size = 0;
+ goto check_btf;
+ }
datasec_id = find_btf_percpu_datasec(btf);
if (datasec_id > 0) {
@@ -15327,9 +16010,6 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
}
}
- insn[0].imm = (u32)addr;
- insn[1].imm = addr >> 32;
-
type = t->type;
t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
@@ -15357,7 +16037,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
}
-
+check_btf:
/* check whether we recorded this BTF (and maybe module) already */
for (i = 0; i < env->used_btf_cnt; i++) {
if (env->used_btfs[i].btf == btf) {
@@ -17032,21 +17712,21 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
(void *(*)(struct bpf_map *map, void *key))NULL));
BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
- (int (*)(struct bpf_map *map, void *key))NULL));
+ (long (*)(struct bpf_map *map, void *key))NULL));
BUILD_BUG_ON(!__same_type(ops->map_update_elem,
- (int (*)(struct bpf_map *map, void *key, void *value,
+ (long (*)(struct bpf_map *map, void *key, void *value,
u64 flags))NULL));
BUILD_BUG_ON(!__same_type(ops->map_push_elem,
- (int (*)(struct bpf_map *map, void *value,
+ (long (*)(struct bpf_map *map, void *value,
u64 flags))NULL));
BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
- (int (*)(struct bpf_map *map, void *value))NULL));
+ (long (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
- (int (*)(struct bpf_map *map, void *value))NULL));
+ (long (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_redirect,
- (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
+ (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
- (int (*)(struct bpf_map *map,
+ (long (*)(struct bpf_map *map,
bpf_callback_t callback_fn,
void *callback_ctx,
u64 flags))NULL));
@@ -17654,6 +18334,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
const char *tname;
struct btf *btf;
long addr = 0;
+ struct module *mod = NULL;
if (!btf_id) {
bpf_log(log, "Tracing programs must provide btf_id\n");
@@ -17827,8 +18508,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
else
addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
} else {
- addr = kallsyms_lookup_name(tname);
+ if (btf_is_module(btf)) {
+ mod = btf_try_get_module(btf);
+ if (mod)
+ addr = find_kallsyms_symbol_value(mod, tname);
+ else
+ addr = 0;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ }
if (!addr) {
+ module_put(mod);
bpf_log(log,
"The address of function %s cannot be found\n",
tname);
@@ -17868,11 +18558,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
break;
}
if (ret) {
+ module_put(mod);
bpf_log(log, "%s is not sleepable\n", tname);
return ret;
}
} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
if (tgt_prog) {
+ module_put(mod);
bpf_log(log, "can't modify return codes of BPF programs\n");
return -EINVAL;
}
@@ -17881,6 +18573,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
!check_attach_modify_return(addr, tname))
ret = 0;
if (ret) {
+ module_put(mod);
bpf_log(log, "%s() is not modifiable\n", tname);
return ret;
}
@@ -17891,6 +18584,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
tgt_info->tgt_addr = addr;
tgt_info->tgt_name = tname;
tgt_info->tgt_type = t;
+ tgt_info->tgt_mod = mod;
return 0;
}
@@ -17970,6 +18664,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
/* store info about the attachment target that will be used later */
prog->aux->attach_func_proto = tgt_info.tgt_type;
prog->aux->attach_func_name = tgt_info.tgt_name;
+ prog->aux->mod = tgt_info.tgt_mod;
if (tgt_prog) {
prog->aux->saved_dst_prog_type = tgt_prog->type;
@@ -18014,12 +18709,12 @@ struct btf *bpf_get_btf_vmlinux(void)
return btf_vmlinux;
}
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
u64 start_time = ktime_get_ns();
struct bpf_verifier_env *env;
- struct bpf_verifier_log *log;
- int i, len, ret = -EINVAL;
+ int i, len, ret = -EINVAL, err;
+ u32 log_true_size;
bool is_priv;
/* no program is valid */
@@ -18032,7 +18727,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
- log = &env->log;
len = (*prog)->len;
env->insn_aux_data =
@@ -18053,20 +18747,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
if (!is_priv)
mutex_lock(&bpf_verifier_lock);
- if (attr->log_level || attr->log_buf || attr->log_size) {
- /* user requested verbose verifier output
- * and supplied buffer to store the verification trace
- */
- log->level = attr->log_level;
- log->ubuf = (char __user *) (unsigned long) attr->log_buf;
- log->len_total = attr->log_size;
-
- /* log attributes have to be sane */
- if (!bpf_verifier_log_attr_valid(log)) {
- ret = -EINVAL;
- goto err_unlock;
- }
- }
+ /* user could have requested verbose verifier output
+ * and supplied buffer to store the verification trace
+ */
+ ret = bpf_vlog_init(&env->log, attr->log_level,
+ (char __user *) (unsigned long) attr->log_buf,
+ attr->log_size);
+ if (ret)
+ goto err_unlock;
mark_verifier_state_clean(env);
@@ -18180,9 +18868,14 @@ skip_full_check:
print_verification_stats(env);
env->prog->aux->verified_insns = env->insn_processed;
- if (log->level && bpf_verifier_log_full(log))
- ret = -ENOSPC;
- if (log->level && !log->ubuf) {
+ /* preserve original error even if log finalization is successful */
+ err = bpf_vlog_finalize(&env->log, &log_true_size);
+ if (err)
+ ret = err;
+
+ if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
+ &log_true_size, sizeof(log_true_size))) {
ret = -EFAULT;
goto err_release_maps;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 935e8121b21e..8a5294f4ce72 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1465,8 +1465,18 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
{
struct css_set *cset;
- cset = current->nsproxy->cgroup_ns->root_cset;
- return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+ if (current->nsproxy) {
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+ } else {
+ /*
+ * NOTE: This function may be called from bpf_cgroup_from_id()
+ * on a task which has already passed exit_task_namespaces() and
+ * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all
+ * cgroups visible for lookups.
+ */
+ return &cgrp_dfl_root.cgrp;
+ }
}
/* look up cgroup associated with given css_set on the specified hierarchy */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 636f1c682ac0..505d86b16642 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
spin_unlock_irq(&callback_lock);
if (adding || deleting)
- update_tasks_cpumask(parent, tmp->new_cpus);
+ update_tasks_cpumask(parent, tmp->addmask);
/*
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
@@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
/*
* Use the cpumasks in trialcs for tmpmasks when they are pointers
* to allocated cpumasks.
+ *
+ * Note that update_parent_subparts_cpumask() uses only addmask &
+ * delmask, but not new_cpus.
*/
tmp.addmask = trialcs->subparts_cpus;
tmp.delmask = trialcs->effective_cpus;
- tmp.new_cpus = trialcs->cpus_allowed;
+ tmp.new_cpus = NULL;
#endif
retval = validate_change(cs, trialcs);
@@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
}
spin_unlock_irq(&callback_lock);
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ /* Now trialcs->cpus_allowed is available */
+ tmp.new_cpus = trialcs->cpus_allowed;
+#endif
+
/* effective_cpus will be updated here */
update_cpumasks_hier(cs, &tmp, false);
@@ -2445,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ * For v2, effective_cpus can't be empty.
+ * Note that in v1, effective_cpus = cpus_allowed.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+ if (cpumask_empty(cs->effective_cpus) ||
+ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
+ return -ENOSPC;
+ return 0;
+}
+
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
@@ -2459,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */
- ret = -ENOSPC;
- if (!is_in_v2_mode() &&
- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
- goto out_unlock;
-
- /*
- * Task cannot be moved to a cpuset with empty effective cpus.
- */
- if (cpumask_empty(cs->effective_cpus))
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
goto out_unlock;
cgroup_taskset_for_each(task, css, tset) {
@@ -2485,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
- ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
@@ -2494,25 +2508,47 @@ out_unlock:
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
+ struct cpuset *cs;
cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
percpu_down_write(&cpuset_rwsem);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}
/*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
+ cs->subparts_cpus);
+ /*
+ * can_attach beforehand should guarantee that this doesn't
+ * fail. TODO: have a better way to handle failure here
+ */
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ cpuset_update_task_spread_flags(cs, task);
+}
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
@@ -2543,20 +2579,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) {
- if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
- else
- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
- /*
- * can_attach beforehand should guarantee that this doesn't
- * fail. TODO: have a better way to handle failure here
- */
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flags(cs, task);
- }
+ cgroup_taskset_for_each(task, css, tset)
+ cpuset_attach_task(cs, task);
/*
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3248,17 +3272,101 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
}
/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+ int ret;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cpuset_rwsem);
+
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
+ goto out_unlock;
+
+ ret = task_can_attach(task, cs->effective_cpus);
+ if (ret)
+ goto out_unlock;
+
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return;
+
+ percpu_down_write(&cpuset_rwsem);
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+}
+
+/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
* state from the parent and before it sits on the cgroup's task list.
*/
static void cpuset_fork(struct task_struct *task)
{
- if (task_css_is_root(task, cpuset_cgrp_id))
+ struct cpuset *cs;
+ bool same_cs;
+
+ rcu_read_lock();
+ cs = task_cs(task);
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs) {
+ if (cs == &top_cpuset)
+ return;
+
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
+ task->mems_allowed = current->mems_allowed;
return;
+ }
+
+ /* CLONE_INTO_CGROUP */
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
+
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
- set_cpus_allowed_ptr(task, current->cpus_ptr);
- task->mems_allowed = current->mems_allowed;
+ percpu_up_write(&cpuset_rwsem);
}
struct cgroup_subsys cpuset_cgrp_subsys = {
@@ -3271,6 +3379,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .can_fork = cpuset_can_fork,
+ .cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 1b6b21851e9d..936473203a6b 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -22,6 +22,7 @@
#include <linux/freezer.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
+#include <linux/cpu.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
- static_branch_inc(&freezer_active);
+ static_branch_inc_cpuslocked(&freezer_active);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
@@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (!(freezer->state & CGROUP_FREEZING)) {
freezer->state &= ~CGROUP_FROZEN;
if (was_freezing)
- static_branch_dec(&freezer_active);
+ static_branch_dec_cpuslocked(&freezer_active);
unfreeze_cgroup(freezer);
}
}
@@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup_subsys_state *pos;
+ cpus_read_lock();
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
@@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
+ cpus_read_unlock();
}
static ssize_t freezer_write(struct kernfs_open_file *of,
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 831f1f472bb8..0a2b4967e333 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
struct task_cputime *cputime = &bstat->cputime;
int i;
- cputime->stime = 0;
- cputime->utime = 0;
- cputime->sum_exec_runtime = 0;
+ memset(bstat, 0, sizeof(*bstat));
for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;
u64 *cpustat = kcpustat.cpustat;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 03e3251cd9d2..dac42a2ad588 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -623,10 +623,10 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask =
- dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ dma_get_min_align_mask(dev) | alloc_align_mask;
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int index, wrap, count = 0, i;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
unsigned int slot_index;
@@ -635,29 +635,34 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
BUG_ON(area_index >= mem->nareas);
/*
+ * For allocations of PAGE_SIZE or larger only look for page aligned
+ * allocations.
+ */
+ if (alloc_size >= PAGE_SIZE)
+ iotlb_align_mask |= ~PAGE_MASK;
+ iotlb_align_mask &= ~(IO_TLB_SIZE - 1);
+
+ /*
* For mappings with an alignment requirement don't bother looping to
- * unaligned slots once we found an aligned one. For allocations of
- * PAGE_SIZE or larger only look for page aligned allocations.
+ * unaligned slots once we found an aligned one.
*/
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
- if (alloc_size >= PAGE_SIZE)
- stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
- stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
spin_lock_irqsave(&area->lock, flags);
if (unlikely(nslots > mem->area_nslabs - area->used))
goto not_found;
slot_base = area_index * mem->area_nslabs;
- index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+ index = area->index;
- do {
+ for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
slot_index = slot_base + index;
if (orig_addr &&
(slot_addr(tbl_dma_addr, slot_index) &
iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
index = wrap_area_index(mem, index + 1);
+ slots_checked++;
continue;
}
@@ -673,7 +678,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
goto found;
}
index = wrap_area_index(mem, index + stride);
- } while (index != wrap);
+ slots_checked += stride;
+ }
not_found:
spin_unlock_irqrestore(&area->lock, flags);
@@ -693,10 +699,7 @@ found:
/*
* Update the indices to avoid searching in the next round.
*/
- if (index + nslots < mem->area_nslabs)
- area->index = index + nslots;
- else
- area->index = 0;
+ area->index = wrap_area_index(mem, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
return slot_index;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fb3e436bcd4a..435815d3be3f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12173,7 +12173,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
/*
* If its not a per-cpu rb, it must be the same task.
*/
- if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ if (output_event->cpu == -1 && output_event->hw.target != event->hw.target)
goto out;
/*
@@ -12893,12 +12893,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
__perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->pinned_groups, &events);
__perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->flexible_groups, &events);
- /*
- * Wait for the events to quiesce before re-instating them.
- */
- synchronize_rcu();
+ if (!list_empty(&events)) {
+ /*
+ * Wait for the events to quiesce before re-instating them.
+ */
+ synchronize_rcu();
- __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
+ __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
+ }
mutex_unlock(&dst_ctx->mutex);
mutex_unlock(&src_ctx->mutex);
diff --git a/kernel/fork.c b/kernel/fork.c
index c0257cbee093..ea332319dffe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (retval)
goto out;
+ mt_clear_in_rcu(vmi.mas.tree);
for_each_vma(old_vmi, mpnt) {
struct file *file;
@@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
retval = arch_dup_mmap(oldmm, mm);
loop_out:
vma_iter_free(&vmi);
+ if (!retval)
+ mt_set_in_rcu(vmi.mas.tree);
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@@ -1171,6 +1174,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
fail_pcpu:
while (i > 0)
percpu_counter_destroy(&mm->rss_stat[--i]);
+ destroy_context(mm);
fail_nocontext:
mm_free_pgd(mm);
fail_nopgd:
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 2e2bf236f558..1c877561a7d2 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -246,7 +246,6 @@ static inline void kmemleak_load_module(const struct module *mod,
void init_build_id(struct module *mod, const struct load_info *info);
void layout_symtab(struct module *mod, struct load_info *info);
void add_kallsyms(struct module *mod, const struct load_info *info);
-unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name);
static inline bool sect_empty(const Elf_Shdr *sect)
{
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index ab2376a1be88..bdc911dbcde5 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -442,7 +442,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
}
/* Given a module and name of symbol, find and return the symbol's value */
-unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name)
+static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name)
{
unsigned int i;
struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
@@ -466,7 +466,7 @@ static unsigned long __module_kallsyms_lookup_name(const char *name)
if (colon) {
mod = find_module_all(name, colon - name, false);
if (mod)
- return find_kallsyms_symbol_value(mod, colon + 1);
+ return __find_kallsyms_symbol_value(mod, colon + 1);
return 0;
}
@@ -475,7 +475,7 @@ static unsigned long __module_kallsyms_lookup_name(const char *name)
if (mod->state == MODULE_STATE_UNFORMED)
continue;
- ret = find_kallsyms_symbol_value(mod, name);
+ ret = __find_kallsyms_symbol_value(mod, name);
if (ret)
return ret;
}
@@ -494,6 +494,16 @@ unsigned long module_kallsyms_lookup_name(const char *name)
return ret;
}
+unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name)
+{
+ unsigned long ret;
+
+ preempt_disable();
+ ret = __find_kallsyms_symbol_value(mod, name);
+ preempt_enable();
+ return ret;
+}
+
int module_kallsyms_on_each_symbol(const char *modname,
int (*fn)(void *, const char *,
struct module *, unsigned long),
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8e880c09ab59..7b95ee98a1a5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3024,6 +3024,18 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
return !!READ_ONCE(krcp->head);
}
+static bool
+need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
+{
+ int i;
+
+ for (i = 0; i < FREE_N_CHANNELS; i++)
+ if (!list_empty(&krwp->bulk_head_free[i]))
+ return true;
+
+ return !!krwp->head_free;
+}
+
static int krc_count(struct kfree_rcu_cpu *krcp)
{
int sum = atomic_read(&krcp->head_count);
@@ -3107,15 +3119,14 @@ static void kfree_rcu_monitor(struct work_struct *work)
for (i = 0; i < KFREE_N_BATCHES; i++) {
struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
- // Try to detach bulk_head or head and attach it over any
- // available corresponding free channel. It can be that
- // a previous RCU batch is in progress, it means that
- // immediately to queue another one is not possible so
- // in that case the monitor work is rearmed.
- if ((!list_empty(&krcp->bulk_head[0]) && list_empty(&krwp->bulk_head_free[0])) ||
- (!list_empty(&krcp->bulk_head[1]) && list_empty(&krwp->bulk_head_free[1])) ||
- (READ_ONCE(krcp->head) && !krwp->head_free)) {
+ // Try to detach bulk_head or head and attach it, only when
+ // all channels are free. Any channel is not free means at krwp
+ // there is on-going rcu work to handle krwp's free business.
+ if (need_wait_for_krwp_work(krwp))
+ continue;
+ // kvfree_rcu_drain_ready() might handle this krcp, if so give up.
+ if (need_offload_krc(krcp)) {
// Channel 1 corresponds to the SLAB-pointer bulk path.
// Channel 2 corresponds to vmalloc-pointer bulk path.
for (j = 0; j < FREE_N_CHANNELS; j++) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6986ea31c984..5f6587d94c1d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10238,6 +10238,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
sds->total_capacity;
+
+ /*
+ * If the local group is more loaded than the average system
+ * load, don't try to pull any tasks.
+ */
+ if (local->avg_load >= sds->avg_load) {
+ env->imbalance = 0;
+ return;
+ }
+
}
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 495cd87d9bf4..351de7916302 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
struct cred *new;
int retval;
kuid_t kruid, keuid, ksuid;
+ bool ruid_new, euid_new, suid_new;
kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid);
@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if ((suid != (uid_t) -1) && !uid_valid(ksuid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
+ (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
+ uid_eq(keuid, old->fsuid))) &&
+ (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
+ return 0;
+
+ ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
+ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
+ euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
+ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
+ suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
+ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
+ if ((ruid_new || euid_new || suid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETUID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
- if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
- goto error;
- if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
- goto error;
- if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
- goto error;
- }
-
if (ruid != (uid_t) -1) {
new->uid = kruid;
if (!uid_eq(kruid, old->uid)) {
@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
struct cred *new;
int retval;
kgid_t krgid, kegid, ksgid;
+ bool rgid_new, egid_new, sgid_new;
krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid);
@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
+ (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
+ gid_eq(kegid, old->fsgid))) &&
+ (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
+ return 0;
+
+ rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
+ !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
+ egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
+ !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
+ sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
+ !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
+ if ((rgid_new || egid_new || sgid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETGID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
- if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
- !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
- goto error;
- if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
- !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
- goto error;
- if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
- !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
- goto error;
- }
if (rgid != (gid_t) -1)
new->gid = krgid;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0feea145bb29..c67bcc89a771 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5667,12 +5667,15 @@ int modify_ftrace_direct(unsigned long ip,
ret = 0;
}
- if (unlikely(ret && new_direct)) {
- direct->count++;
- list_del_rcu(&new_direct->next);
- synchronize_rcu_tasks();
- kfree(new_direct);
- ftrace_direct_func_count--;
+ if (ret) {
+ direct->addr = old_addr;
+ if (unlikely(new_direct)) {
+ direct->count++;
+ list_del_rcu(&new_direct->next);
+ synchronize_rcu_tasks();
+ kfree(new_direct);
+ ftrace_direct_func_count--;
+ }
}
out_unlock:
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c6f47b6cfd5f..76a2d91eecad 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3098,6 +3098,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
if (RB_WARN_ON(cpu_buffer,
rb_is_reader_page(cpu_buffer->tail_page)))
return;
+ /*
+ * No need for a memory barrier here, as the update
+ * of the tail_page did it for this page.
+ */
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(&cpu_buffer->commit_page);
@@ -3107,6 +3111,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
+ /* Make sure the readers see the content of what is committed. */
+ smp_wmb();
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
@@ -4684,7 +4690,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
/*
* Make sure we see any padding after the write update
- * (see rb_reset_tail())
+ * (see rb_reset_tail()).
+ *
+ * In addition, a writer may be writing on the reader page
+ * if the page has not been fully filled, so the read barrier
+ * is also needed to make sure we see the content of what is
+ * committed by the writer (see rb_set_commit_to_write()).
*/
smp_rmb();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 937e9676dfd4..36a6037823cd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr,
unsigned long flags;
if (in_nmi()) {
- internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
- internal_trace_puts("*** snapshot is being ignored ***\n");
+ trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
+ trace_array_puts(tr, "*** snapshot is being ignored ***\n");
return;
}
if (!tr->allocated_snapshot) {
- internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
- internal_trace_puts("*** stopping trace here! ***\n");
- tracing_off();
+ trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
+ trace_array_puts(tr, "*** stopping trace here! ***\n");
+ tracer_tracing_off(tr);
return;
}
/* Note, snapshot can not be used when the tracer uses it */
if (tracer->use_max_tr) {
- internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
- internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
+ trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
+ trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
return;
}
@@ -9516,6 +9516,7 @@ static int __remove_instance(struct trace_array *tr)
tracefs_remove(tr->dir);
free_percpu(tr->last_func_repeats);
free_trace_buffers(tr);
+ clear_tracing_err_log(tr);
for (i = 0; i < tr->nr_topts; i++) {
kfree(tr->topts[i].topts);
@@ -10393,19 +10394,20 @@ out:
void __init ftrace_boot_snapshot(void)
{
+#ifdef CONFIG_TRACER_MAX_TRACE
struct trace_array *tr;
- if (snapshot_at_boot) {
- tracing_snapshot();
- internal_trace_puts("** Boot snapshot taken **\n");
- }
+ if (!snapshot_at_boot)
+ return;
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr == &global_trace)
+ if (!tr->allocated_snapshot)
continue;
- trace_array_puts(tr, "** Boot snapshot taken **\n");
+
tracing_snapshot_instance(tr);
+ trace_array_puts(tr, "** Boot snapshot taken **\n");
}
+#endif
}
void __init early_trace_init(void)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 46d0abb32d0f..d6a70aff2410 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -44,14 +44,21 @@ enum { ERRORS };
static const char *err_text[] = { ERRORS };
+static DEFINE_MUTEX(lastcmd_mutex);
static char *last_cmd;
static int errpos(const char *str)
{
+ int ret = 0;
+
+ mutex_lock(&lastcmd_mutex);
if (!str || !last_cmd)
- return 0;
+ goto out;
- return err_pos(last_cmd, str);
+ ret = err_pos(last_cmd, str);
+ out:
+ mutex_unlock(&lastcmd_mutex);
+ return ret;
}
static void last_cmd_set(const char *str)
@@ -59,18 +66,22 @@ static void last_cmd_set(const char *str)
if (!str)
return;
+ mutex_lock(&lastcmd_mutex);
kfree(last_cmd);
-
last_cmd = kstrdup(str, GFP_KERNEL);
+ mutex_unlock(&lastcmd_mutex);
}
static void synth_err(u8 err_type, u16 err_pos)
{
+ mutex_lock(&lastcmd_mutex);
if (!last_cmd)
- return;
+ goto out;
tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
err_type, err_pos);
+ out:
+ mutex_unlock(&lastcmd_mutex);
}
static int create_synth_event(const char *raw_command);
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 9176bb7a9bb4..4496975f2029 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency)
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
tr = inst->tr;
- if (tr->max_latency < latency) {
+ if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
tr->max_latency = latency;
latency_fsnotify(tr);
}
@@ -1738,6 +1738,8 @@ static int timerlat_main(void *data)
trace_timerlat_sample(&s);
+ notify_new_max_latency(diff);
+
timerlat_dump_stack(time_to_us(diff));
tlat->tracing_thread = false;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c8b379e2e9ad..39d1d93164bd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1143,7 +1143,7 @@ menu "Scheduler Debugging"
config SCHED_DEBUG
bool "Collect scheduler debugging info"
- depends on DEBUG_KERNEL && PROC_FS
+ depends on DEBUG_KERNEL && DEBUG_FS
default y
help
If you say Y here, the /sys/kernel/debug/sched file will be provided
@@ -1392,7 +1392,7 @@ config LOCKDEP_STACK_TRACE_HASH_BITS
range 10 30
default 14
help
- Try increasing this value if you need large MAX_STACK_TRACE_ENTRIES.
+ Try increasing this value if you need large STACK_TRACE_HASH_SIZE.
config LOCKDEP_CIRCULAR_QUEUE_BITS
int "Bitsize for elements in circular_queue struct"
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 9e2735cbc2b4..1281a40d5735 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head)
*/
static void ma_free_rcu(struct maple_node *node)
{
- node->parent = ma_parent_ptr(node);
+ WARN_ON(node->parent != ma_parent_ptr(node));
call_rcu(&node->rcu, mt_free_rcu);
}
@@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode)
*/
static inline bool ma_dead_node(const struct maple_node *node)
{
- struct maple_node *parent = (void *)((unsigned long)
- node->parent & ~MAPLE_NODE_MASK);
+ struct maple_node *parent;
+ /* Do not reorder reads from the node prior to the parent check */
+ smp_rmb();
+ parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
return (parent == node);
}
+
/*
* mte_dead_node() - check if the @enode is dead.
* @enode: The encoded maple node
@@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode)
struct maple_node *parent, *node;
node = mte_to_node(enode);
+ /* Do not reorder reads from the node prior to the parent check */
+ smp_rmb();
parent = mte_parent(enode);
return (parent == node);
}
@@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)
* @node - the maple node
* @type - the node type
*
+ * In the event of a dead node, this array may be %NULL
+ *
* Return: A pointer to the maple node pivots
*/
static inline unsigned long *ma_pivots(struct maple_node *node,
@@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,
return rcu_dereference_check(slots[offset], mt_locked(mt));
}
+static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
+ unsigned char offset)
+{
+ return rcu_dereference_protected(slots[offset], mt_locked(mt));
+}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
* @mas: The maple state
@@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
unsigned char offset)
{
- return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
+ return mt_slot_locked(mas->tree, slots, offset);
}
/*
@@ -900,6 +912,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
}
/*
+ * mt_clear_meta() - clear the metadata information of a node, if it exists
+ * @mt: The maple tree
+ * @mn: The maple node
+ * @type: The maple node type
+ * @offset: The offset of the highest sub-gap in this node.
+ * @end: The end of the data in this node.
+ */
+static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
+ enum maple_type type)
+{
+ struct maple_metadata *meta;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *next;
+
+ switch (type) {
+ case maple_range_64:
+ pivots = mn->mr64.pivot;
+ if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
+ slots = mn->mr64.slot;
+ next = mt_slot_locked(mt, slots,
+ MAPLE_RANGE64_SLOTS - 1);
+ if (unlikely((mte_to_node(next) &&
+ mte_node_type(next))))
+ return; /* no metadata, could be node */
+ }
+ fallthrough;
+ case maple_arange_64:
+ meta = ma_meta(mn, type);
+ break;
+ default:
+ return;
+ }
+
+ meta->gap = 0;
+ meta->end = 0;
+}
+
+/*
* ma_meta_end() - Get the data end of a node from the metadata
* @mn: The maple node
* @mt: The maple node type
@@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas)
a_type = mas_parent_enum(mas, p_enode);
a_node = mte_parent(p_enode);
a_slot = mte_parent_slot(p_enode);
- pivots = ma_pivots(a_node, a_type);
a_enode = mt_mk_node(a_node, a_type);
+ pivots = ma_pivots(a_node, a_type);
+
+ if (unlikely(ma_dead_node(a_node)))
+ return 1;
if (!set_min && a_slot) {
set_min = true;
@@ -1249,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
node = mas->alloc;
node->request_count = 0;
while (requested) {
- max_req = MAPLE_ALLOC_SLOTS;
- if (node->node_count) {
- unsigned int offset = node->node_count;
-
- slots = (void **)&node->slot[offset];
- max_req -= offset;
- } else {
- slots = (void **)&node->slot;
- }
-
+ max_req = MAPLE_ALLOC_SLOTS - node->node_count;
+ slots = (void **)&node->slot[node->node_count];
max_req = min(requested, max_req);
count = mt_alloc_bulk(gfp, max_req, slots);
if (!count)
goto nomem_bulk;
+ if (node->node_count == 0) {
+ node->slot[0]->node_count = 0;
+ node->slot[0]->request_count = 0;
+ }
+
node->node_count += count;
allocated += count;
node = node->slot[0];
- node->node_count = 0;
- node->request_count = 0;
requested -= count;
}
mas->alloc->total = allocated;
@@ -1354,12 +1403,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)
mas->max = ULONG_MAX;
mas->depth = 0;
+retry:
root = mas_root(mas);
/* Tree with nodes */
if (likely(xa_is_node(root))) {
mas->depth = 1;
mas->node = mte_safe_root(root);
mas->offset = 0;
+ if (mte_dead_node(mas->node))
+ goto retry;
+
return NULL;
}
@@ -1401,6 +1454,9 @@ static inline unsigned char ma_data_end(struct maple_node *node,
{
unsigned char offset;
+ if (!pivots)
+ return 0;
+
if (type == maple_arange_64)
return ma_meta_end(node, type);
@@ -1436,6 +1492,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
return ma_meta_end(node, type);
pivots = ma_pivots(node, type);
+ if (unlikely(ma_dead_node(node)))
+ return 0;
+
offset = mt_pivots[type] - 1;
if (likely(!pivots[offset]))
return ma_meta_end(node, type);
@@ -1724,8 +1783,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced)
rcu_assign_pointer(slots[offset], mas->node);
}
- if (!advanced)
+ if (!advanced) {
+ mte_set_node_dead(old_enode);
mas_free(mas, old_enode);
+ }
}
/*
@@ -3659,10 +3720,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
slot++;
mas->depth = 1;
mas_set_height(mas);
-
+ ma_set_meta(node, maple_leaf_64, 0, slot);
/* swap the new root into the tree */
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- ma_set_meta(node, maple_leaf_64, 0, slot);
return slot;
}
@@ -3875,18 +3935,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas)
end = ma_data_end(node, type, pivots, max);
if (unlikely(ma_dead_node(node)))
goto dead_node;
-
- if (pivots[offset] >= mas->index)
- goto next;
-
do {
- offset++;
- } while ((offset < end) && (pivots[offset] < mas->index));
-
- if (likely(offset > end))
- max = pivots[offset];
+ if (pivots[offset] >= mas->index) {
+ max = pivots[offset];
+ break;
+ }
+ } while (++offset < end);
-next:
slots = ma_slots(node, type);
next = mt_slot(mas->tree, slots, offset);
if (unlikely(ma_dead_node(node)))
@@ -4164,6 +4219,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
done:
mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
if (in_rcu) {
+ mte_set_node_dead(mas->node);
mas->node = mt_mk_node(newnode, wr_mas->type);
mas_replace(mas, false);
} else {
@@ -4505,6 +4561,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
node = mas_mn(mas);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
mas->max = pivots[offset];
if (offset)
mas->min = pivots[offset - 1] + 1;
@@ -4526,6 +4585,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
offset = ma_data_end(node, mt, pivots, mas->max);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
if (offset)
mas->min = pivots[offset - 1] + 1;
@@ -4574,6 +4636,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
struct maple_enode *enode;
int level = 0;
unsigned char offset;
+ unsigned char node_end;
enum maple_type mt;
void __rcu **slots;
@@ -4597,7 +4660,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
node = mas_mn(mas);
mt = mte_node_type(mas->node);
pivots = ma_pivots(node, mt);
- } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
+ node_end = ma_data_end(node, mt, pivots, mas->max);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ } while (unlikely(offset == node_end));
slots = ma_slots(node, mt);
pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
@@ -4613,6 +4680,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
mt = mte_node_type(mas->node);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
offset = 0;
pivot = pivots[0];
}
@@ -4659,11 +4729,14 @@ static inline void *mas_next_nentry(struct ma_state *mas,
return NULL;
}
- pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
- mas->index = mas_safe_min(mas, pivots, mas->offset);
+ pivots = ma_pivots(node, type);
count = ma_data_end(node, type, pivots, mas->max);
- if (ma_dead_node(node))
+ if (unlikely(ma_dead_node(node)))
+ return NULL;
+
+ mas->index = mas_safe_min(mas, pivots, mas->offset);
+ if (unlikely(ma_dead_node(node)))
return NULL;
if (mas->index > max)
@@ -4817,6 +4890,11 @@ retry:
slots = ma_slots(mn, mt);
pivots = ma_pivots(mn, mt);
+ if (unlikely(ma_dead_node(mn))) {
+ mas_rewalk(mas, index);
+ goto retry;
+ }
+
if (offset == mt_pivots[mt])
pivot = mas->max;
else
@@ -4887,7 +4965,8 @@ not_found:
* Return: True if found in a leaf, false otherwise.
*
*/
-static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
+static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
+ unsigned long *gap_min, unsigned long *gap_max)
{
enum maple_type type = mte_node_type(mas->node);
struct maple_node *node = mas_mn(mas);
@@ -4952,8 +5031,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
- mas->min = min;
- mas->max = min + gap - 1;
+ *gap_min = min;
+ *gap_max = min + gap - 1;
return true;
}
@@ -4977,10 +5056,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
{
enum maple_type type = mte_node_type(mas->node);
unsigned long pivot, min, gap = 0;
- unsigned char offset;
- unsigned long *gaps;
- unsigned long *pivots = ma_pivots(mas_mn(mas), type);
- void __rcu **slots = ma_slots(mas_mn(mas), type);
+ unsigned char offset, data_end;
+ unsigned long *gaps, *pivots;
+ void __rcu **slots;
+ struct maple_node *node;
bool found = false;
if (ma_is_dense(type)) {
@@ -4988,13 +5067,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
return true;
}
- gaps = ma_gaps(mte_to_node(mas->node), type);
+ node = mas_mn(mas);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ gaps = ma_gaps(node, type);
offset = mas->offset;
min = mas_safe_min(mas, pivots, offset);
- for (; offset < mt_slots[type]; offset++) {
- pivot = mas_safe_pivot(mas, pivots, offset, type);
- if (offset && !pivot)
- break;
+ data_end = ma_data_end(node, type, pivots, mas->max);
+ for (; offset <= data_end; offset++) {
+ pivot = mas_logical_pivot(mas, pivots, offset, type);
/* Not within lower bounds */
if (mas->index > pivot)
@@ -5229,6 +5310,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
unsigned long *pivots;
enum maple_type mt;
+ if (min >= max)
+ return -EINVAL;
+
if (mas_is_start(mas))
mas_start(mas);
else if (mas->offset >= 2)
@@ -5283,6 +5367,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
{
struct maple_enode *last = mas->node;
+ if (min >= max)
+ return -EINVAL;
+
if (mas_is_start(mas)) {
mas_start(mas);
mas->offset = mas_data_end(mas);
@@ -5302,7 +5389,7 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
mas->index = min;
mas->last = max;
- while (!mas_rev_awalk(mas, size)) {
+ while (!mas_rev_awalk(mas, size, &min, &max)) {
if (last == mas->node) {
if (!mas_rewind_node(mas))
return -EBUSY;
@@ -5317,17 +5404,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
return -EBUSY;
- /*
- * mas_rev_awalk() has set mas->min and mas->max to the gap values. If
- * the maximum is outside the window we are searching, then use the last
- * location in the search.
- * mas->max and mas->min is the range of the gap.
- * mas->index and mas->last are currently set to the search range.
- */
-
/* Trim the upper limit to the max. */
- if (mas->max <= mas->last)
- mas->last = mas->max;
+ if (max <= mas->last)
+ mas->last = max;
mas->index = mas->last - size + 1;
return 0;
@@ -5400,24 +5479,26 @@ no_gap:
}
/*
- * mas_dead_leaves() - Mark all leaves of a node as dead.
+ * mte_dead_leaves() - Mark all leaves of a node as dead.
* @mas: The maple state
* @slots: Pointer to the slot array
+ * @type: The maple node type
*
* Must hold the write lock.
*
* Return: The number of leaves marked as dead.
*/
static inline
-unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
+unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
+ void __rcu **slots)
{
struct maple_node *node;
enum maple_type type;
void *entry;
int offset;
- for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
- entry = mas_slot_locked(mas, slots, offset);
+ for (offset = 0; offset < mt_slot_count(enode); offset++) {
+ entry = mt_slot(mt, slots, offset);
type = mte_node_type(entry);
node = mte_to_node(entry);
/* Use both node and type to catch LE & BE metadata */
@@ -5425,7 +5506,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
break;
mte_set_node_dead(entry);
- smp_wmb(); /* Needed for RCU */
node->type = type;
rcu_assign_pointer(slots[offset], node);
}
@@ -5433,151 +5513,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
return offset;
}
-static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
+/**
+ * mte_dead_walk() - Walk down a dead tree to just before the leaves
+ * @enode: The maple encoded node
+ * @offset: The starting offset
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
+static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{
struct maple_node *node, *next;
void __rcu **slots = NULL;
- next = mas_mn(mas);
+ next = mte_to_node(*enode);
do {
- mas->node = ma_enode_ptr(next);
- node = mas_mn(mas);
+ *enode = ma_enode_ptr(next);
+ node = mte_to_node(*enode);
slots = ma_slots(node, node->type);
- next = mas_slot_locked(mas, slots, offset);
+ next = rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map));
offset = 0;
} while (!ma_is_leaf(next->type));
return slots;
}
+/**
+ * mt_free_walk() - Walk & free a tree in the RCU callback context
+ * @head: The RCU head that's within the node.
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
static void mt_free_walk(struct rcu_head *head)
{
void __rcu **slots;
struct maple_node *node, *start;
- struct maple_tree mt;
+ struct maple_enode *enode;
unsigned char offset;
enum maple_type type;
- MA_STATE(mas, &mt, 0, 0);
node = container_of(head, struct maple_node, rcu);
if (ma_is_leaf(node->type))
goto free_leaf;
- mt_init_flags(&mt, node->ma_flags);
- mas_lock(&mas);
start = node;
- mas.node = mt_mk_node(node, node->type);
- slots = mas_dead_walk(&mas, 0);
- node = mas_mn(&mas);
+ enode = mt_mk_node(node, node->type);
+ slots = mte_dead_walk(&enode, 0);
+ node = mte_to_node(enode);
do {
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
-
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
- if ((offset < mt_slots[type]) && (slots[offset]))
- slots = mas_dead_walk(&mas, offset);
-
- node = mas_mn(&mas);
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
+
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
+ if ((offset < mt_slots[type]) &&
+ rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map)))
+ slots = mte_dead_walk(&enode, offset);
+ node = mte_to_node(enode);
} while ((node != start) || (node->slot_len < offset));
slots = ma_slots(node, node->type);
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
free_leaf:
mt_free_rcu(&node->rcu);
}
-static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
- struct maple_enode *prev, unsigned char offset)
+static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
+ struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{
struct maple_node *node;
- struct maple_enode *next = mas->node;
+ struct maple_enode *next = *enode;
void __rcu **slots = NULL;
+ enum maple_type type;
+ unsigned char next_offset = 0;
do {
- mas->node = next;
- node = mas_mn(mas);
- slots = ma_slots(node, mte_node_type(mas->node));
- next = mas_slot_locked(mas, slots, 0);
+ *enode = next;
+ node = mte_to_node(*enode);
+ type = mte_node_type(*enode);
+ slots = ma_slots(node, type);
+ next = mt_slot_locked(mt, slots, next_offset);
if ((mte_dead_node(next)))
- next = mas_slot_locked(mas, slots, 1);
+ next = mt_slot_locked(mt, slots, ++next_offset);
- mte_set_node_dead(mas->node);
- node->type = mte_node_type(mas->node);
+ mte_set_node_dead(*enode);
+ node->type = type;
node->piv_parent = prev;
node->parent_slot = offset;
- offset = 0;
- prev = mas->node;
+ offset = next_offset;
+ next_offset = 0;
+ prev = *enode;
} while (!mte_is_leaf(next));
return slots;
}
-static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
+static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
bool free)
{
void __rcu **slots;
struct maple_node *node = mte_to_node(enode);
struct maple_enode *start;
- struct maple_tree mt;
- MA_STATE(mas, &mt, 0, 0);
-
- if (mte_is_leaf(enode))
+ if (mte_is_leaf(enode)) {
+ node->type = mte_node_type(enode);
goto free_leaf;
+ }
- mt_init_flags(&mt, ma_flags);
- mas_lock(&mas);
-
- mas.node = start = enode;
- slots = mas_destroy_descend(&mas, start, 0);
- node = mas_mn(&mas);
+ start = enode;
+ slots = mte_destroy_descend(&enode, mt, start, 0);
+ node = mte_to_node(enode); // Updated in the above call.
do {
enum maple_type type;
unsigned char offset;
struct maple_enode *parent, *tmp;
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
if (offset >= mt_slots[type])
goto next;
- tmp = mas_slot_locked(&mas, slots, offset);
+ tmp = mt_slot_locked(mt, slots, offset);
if (mte_node_type(tmp) && mte_to_node(tmp)) {
- parent = mas.node;
- mas.node = tmp;
- slots = mas_destroy_descend(&mas, parent, offset);
+ parent = enode;
+ enode = tmp;
+ slots = mte_destroy_descend(&enode, mt, parent, offset);
}
next:
- node = mas_mn(&mas);
- } while (start != mas.node);
+ node = mte_to_node(enode);
+ } while (start != enode);
- node = mas_mn(&mas);
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node = mte_to_node(enode);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
-
free_leaf:
if (free)
mt_free_rcu(&node->rcu);
+ else
+ mt_clear_meta(mt, node, node->type);
}
/*
@@ -5593,10 +5682,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
struct maple_node *node = mte_to_node(enode);
if (mt_in_rcu(mt)) {
- mt_destroy_walk(enode, mt->ma_flags, false);
+ mt_destroy_walk(enode, mt, false);
call_rcu(&node->rcu, mt_free_walk);
} else {
- mt_destroy_walk(enode, mt->ma_flags, true);
+ mt_destroy_walk(enode, mt, true);
}
}
@@ -6617,11 +6706,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
while (likely(!ma_is_leaf(mt))) {
MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
slots = ma_slots(mn, mt);
- pivots = ma_pivots(mn, mt);
- max = pivots[0];
entry = mas_slot(mas, slots, 0);
+ pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
+ max = pivots[0];
mas->node = entry;
mn = mas_mn(mas);
mt = mte_node_type(mas->node);
@@ -6641,13 +6730,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
if (likely(entry))
return entry;
- pivots = ma_pivots(mn, mt);
- mas->index = pivots[0] + 1;
mas->offset = 1;
entry = mas_slot(mas, slots, 1);
+ pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
+ mas->index = pivots[0] + 1;
if (mas->index > limit)
goto none;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a53b9360b72e..30d2d0386fdb 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
+static void cgwb_free_rcu(struct rcu_head *rcu_head)
+{
+ struct bdi_writeback *wb = container_of(rcu_head,
+ struct bdi_writeback, rcu);
+
+ percpu_ref_exit(&wb->refcnt);
+ kfree(wb);
+}
+
static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct work_struct *work)
list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock);
- percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
- kfree_rcu(wb, rcu);
+ call_rcu(&wb->rcu, cgwb_free_rcu);
}
static void cgwb_release(struct percpu_ref *refcnt)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 032fb0ef9cd1..3fae2d2496ab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1838,10 +1838,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_swap_pmd(*pmd)) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
struct page *page = pfn_swap_entry_to_page(entry);
+ pmd_t newpmd;
VM_BUG_ON(!is_pmd_migration_entry(*pmd));
if (is_writable_migration_entry(entry)) {
- pmd_t newpmd;
/*
* A protection check is difficult so
* just be safe and disable write
@@ -1855,8 +1855,16 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
newpmd = pmd_swp_mksoft_dirty(newpmd);
if (pmd_swp_uffd_wp(*pmd))
newpmd = pmd_swp_mkuffd_wp(newpmd);
- set_pmd_at(mm, addr, pmd, newpmd);
+ } else {
+ newpmd = *pmd;
}
+
+ if (uffd_wp)
+ newpmd = pmd_swp_mkuffd_wp(newpmd);
+ else if (uffd_wp_resolve)
+ newpmd = pmd_swp_clear_uffd_wp(newpmd);
+ if (!pmd_same(*pmd, newpmd))
+ set_pmd_at(mm, addr, pmd, newpmd);
goto unlock;
}
#endif
@@ -2657,9 +2665,10 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
is_hzp = is_huge_zero_page(&folio->page);
- VM_WARN_ON_ONCE_FOLIO(is_hzp, folio);
- if (is_hzp)
+ if (is_hzp) {
+ pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
return -EBUSY;
+ }
if (folio_test_writeback(folio))
return -EBUSY;
@@ -3251,6 +3260,8 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
pmdswp = swp_entry_to_pmd(entry);
if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp);
+ if (pmd_uffd_wp(pmdval))
+ pmdswp = pmd_swp_mkuffd_wp(pmdswp);
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
page_remove_rmap(page, vma, true);
put_page(page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 07abcb6eb203..245038a9fe4e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5478,7 +5478,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
struct folio *pagecache_folio, spinlock_t *ptl)
{
const bool unshare = flags & FAULT_FLAG_UNSHARE;
- pte_t pte;
+ pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(vma);
struct page *old_page;
struct folio *new_folio;
@@ -5488,6 +5488,17 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
struct mmu_notifier_range range;
/*
+ * Never handle CoW for uffd-wp protected pages. It should be only
+ * handled when the uffd-wp protection is removed.
+ *
+ * Note that only the CoW optimization path (in hugetlb_no_page())
+ * can trigger this, because hugetlb_fault() will always resolve
+ * uffd-wp bit first.
+ */
+ if (!unshare && huge_pte_uffd_wp(pte))
+ return 0;
+
+ /*
* hugetlb does not support FOLL_FORCE-style write faults that keep the
* PTE mapped R/O such as maybe_mkwrite() would do.
*/
@@ -5500,7 +5511,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
}
- pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
delayacct_wpcopy_start();
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 79c94ee55f97..1065e0568d05 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -556,15 +556,11 @@ static unsigned long kfence_init_pool(void)
* enters __slab_free() slow-path.
*/
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
- struct slab *slab = page_slab(&pages[i]);
+ struct slab *slab = page_slab(nth_page(pages, i));
if (!i || (i % 2))
continue;
- /* Verify we do not have a compound head page. */
- if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
- return addr;
-
__folio_set_slab(slab_folio(slab));
#ifdef CONFIG_MEMCG
slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
@@ -597,12 +593,26 @@ static unsigned long kfence_init_pool(void)
/* Protect the right redzone. */
if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
- return addr;
+ goto reset_slab;
addr += 2 * PAGE_SIZE;
}
return 0;
+
+reset_slab:
+ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
+ struct slab *slab = page_slab(nth_page(pages, i));
+
+ if (!i || (i % 2))
+ continue;
+#ifdef CONFIG_MEMCG
+ slab->memcg_data = 0;
+#endif
+ __folio_clear_slab(slab_folio(slab));
+ }
+
+ return addr;
}
static bool __init kfence_init_pool_early(void)
@@ -632,16 +642,6 @@ static bool __init kfence_init_pool_early(void)
* fails for the first page, and therefore expect addr==__kfence_pool in
* most failure cases.
*/
- for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
- struct slab *slab = virt_to_slab(p);
-
- if (!slab)
- continue;
-#ifdef CONFIG_MEMCG
- slab->memcg_data = 0;
-#endif
- __folio_clear_slab(slab_folio(slab));
- }
memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
__kfence_pool = NULL;
return false;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 92e6f56a932d..0ec69b96b497 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -572,6 +572,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_PTE_NON_PRESENT;
goto out;
}
+ if (pte_uffd_wp(pteval)) {
+ result = SCAN_PTE_UFFD_WP;
+ goto out;
+ }
page = vm_normal_page(vma, address, pteval);
if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL;
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 3807502766a3..ec0da72e65aa 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -148,35 +148,74 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end)
* into the virtual memory. If those physical pages already had shadow/origin,
* those are ignored.
*/
-void kmsan_ioremap_page_range(unsigned long start, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int page_shift)
+int kmsan_ioremap_page_range(unsigned long start, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int page_shift)
{
gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO;
struct page *shadow, *origin;
unsigned long off = 0;
- int nr;
+ int nr, err = 0, clean = 0, mapped;
if (!kmsan_enabled || kmsan_in_runtime())
- return;
+ return 0;
nr = (end - start) / PAGE_SIZE;
kmsan_enter_runtime();
- for (int i = 0; i < nr; i++, off += PAGE_SIZE) {
+ for (int i = 0; i < nr; i++, off += PAGE_SIZE, clean = i) {
shadow = alloc_pages(gfp_mask, 1);
origin = alloc_pages(gfp_mask, 1);
- __vmap_pages_range_noflush(
+ if (!shadow || !origin) {
+ err = -ENOMEM;
+ goto ret;
+ }
+ mapped = __vmap_pages_range_noflush(
vmalloc_shadow(start + off),
vmalloc_shadow(start + off + PAGE_SIZE), prot, &shadow,
PAGE_SHIFT);
- __vmap_pages_range_noflush(
+ if (mapped) {
+ err = mapped;
+ goto ret;
+ }
+ shadow = NULL;
+ mapped = __vmap_pages_range_noflush(
vmalloc_origin(start + off),
vmalloc_origin(start + off + PAGE_SIZE), prot, &origin,
PAGE_SHIFT);
+ if (mapped) {
+ __vunmap_range_noflush(
+ vmalloc_shadow(start + off),
+ vmalloc_shadow(start + off + PAGE_SIZE));
+ err = mapped;
+ goto ret;
+ }
+ origin = NULL;
+ }
+ /* Page mapping loop finished normally, nothing to clean up. */
+ clean = 0;
+
+ret:
+ if (clean > 0) {
+ /*
+ * Something went wrong. Clean up shadow/origin pages allocated
+ * on the last loop iteration, then delete mappings created
+ * during the previous iterations.
+ */
+ if (shadow)
+ __free_pages(shadow, 1);
+ if (origin)
+ __free_pages(origin, 1);
+ __vunmap_range_noflush(
+ vmalloc_shadow(start),
+ vmalloc_shadow(start + clean * PAGE_SIZE));
+ __vunmap_range_noflush(
+ vmalloc_origin(start),
+ vmalloc_origin(start + clean * PAGE_SIZE));
}
flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
kmsan_leave_runtime();
+ return err;
}
void kmsan_iounmap_page_range(unsigned long start, unsigned long end)
diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c
index a787c04e9583..b8bb95eea5e3 100644
--- a/mm/kmsan/shadow.c
+++ b/mm/kmsan/shadow.c
@@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page, unsigned int order)
kmsan_leave_runtime();
}
-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
- pgprot_t prot, struct page **pages,
- unsigned int page_shift)
+int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages,
+ unsigned int page_shift)
{
unsigned long shadow_start, origin_start, shadow_end, origin_end;
struct page **s_pages, **o_pages;
- int nr, mapped;
+ int nr, mapped, err = 0;
if (!kmsan_enabled)
- return;
+ return 0;
shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
if (!shadow_start)
- return;
+ return 0;
nr = (end - start) / PAGE_SIZE;
s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
- if (!s_pages || !o_pages)
+ if (!s_pages || !o_pages) {
+ err = -ENOMEM;
goto ret;
+ }
for (int i = 0; i < nr; i++) {
s_pages[i] = shadow_page_for(pages[i]);
o_pages[i] = origin_page_for(pages[i]);
@@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
kmsan_enter_runtime();
mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
s_pages, page_shift);
- KMSAN_WARN_ON(mapped);
+ if (mapped) {
+ err = mapped;
+ goto ret;
+ }
mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
o_pages, page_shift);
- KMSAN_WARN_ON(mapped);
+ if (mapped) {
+ err = mapped;
+ goto ret;
+ }
kmsan_leave_runtime();
flush_tlb_kernel_range(shadow_start, shadow_end);
flush_tlb_kernel_range(origin_start, origin_end);
@@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
ret:
kfree(s_pages);
kfree(o_pages);
+ return err;
}
/* Allocate metadata for pages allocated at boot time. */
diff --git a/mm/maccess.c b/mm/maccess.c
index 074f6b086671..518a25667323 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <asm/tlb.h>
bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
size_t size)
@@ -113,11 +114,16 @@ Efault:
long copy_from_user_nofault(void *dst, const void __user *src, size_t size)
{
long ret = -EFAULT;
- if (access_ok(src, size)) {
- pagefault_disable();
- ret = __copy_from_user_inatomic(dst, src, size);
- pagefault_enable();
- }
+
+ if (!__access_ok(src, size))
+ return ret;
+
+ if (!nmi_uaccess_okay())
+ return ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, size);
+ pagefault_enable();
if (ret)
return -EFAULT;
diff --git a/mm/memory.c b/mm/memory.c
index f456f3b5049c..01a23ad48a04 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3563,8 +3563,21 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct mmu_notifier_range range;
- if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags))
+ /*
+ * We need a reference to lock the folio because we don't hold
+ * the PTL so a racing thread can remove the device-exclusive
+ * entry and unmap it. If the folio is free the entry must
+ * have been removed already. If it happens to have already
+ * been re-allocated after being freed all we do is lock and
+ * unlock it.
+ */
+ if (!folio_try_get(folio))
+ return 0;
+
+ if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
+ folio_put(folio);
return VM_FAULT_RETRY;
+ }
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
vma->vm_mm, vmf->address & PAGE_MASK,
(vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
@@ -3577,6 +3590,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
folio_unlock(folio);
+ folio_put(folio);
mmu_notifier_invalidate_range_end(&range);
return 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a256a241fd1d..2068b594dc88 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -790,61 +790,50 @@ static int vma_replace_policy(struct vm_area_struct *vma,
return err;
}
-/* Step 2: apply policy to a range and do splits. */
-static int mbind_range(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct mempolicy *new_pol)
+/* Split or merge the VMA (if required) and apply the new policy */
+static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, struct mempolicy *new_pol)
{
- VMA_ITERATOR(vmi, mm, start);
- struct vm_area_struct *prev;
- struct vm_area_struct *vma;
- int err = 0;
+ struct vm_area_struct *merged;
+ unsigned long vmstart, vmend;
pgoff_t pgoff;
+ int err;
- prev = vma_prev(&vmi);
- vma = vma_find(&vmi, end);
- if (WARN_ON(!vma))
+ vmend = min(end, vma->vm_end);
+ if (start > vma->vm_start) {
+ *prev = vma;
+ vmstart = start;
+ } else {
+ vmstart = vma->vm_start;
+ }
+
+ if (mpol_equal(vma_policy(vma), new_pol))
return 0;
- if (start > vma->vm_start)
- prev = vma;
-
- do {
- unsigned long vmstart = max(start, vma->vm_start);
- unsigned long vmend = min(end, vma->vm_end);
-
- if (mpol_equal(vma_policy(vma), new_pol))
- goto next;
-
- pgoff = vma->vm_pgoff +
- ((vmstart - vma->vm_start) >> PAGE_SHIFT);
- prev = vma_merge(&vmi, mm, prev, vmstart, vmend, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff,
- new_pol, vma->vm_userfaultfd_ctx,
- anon_vma_name(vma));
- if (prev) {
- vma = prev;
- goto replace;
- }
- if (vma->vm_start != vmstart) {
- err = split_vma(&vmi, vma, vmstart, 1);
- if (err)
- goto out;
- }
- if (vma->vm_end != vmend) {
- err = split_vma(&vmi, vma, vmend, 0);
- if (err)
- goto out;
- }
-replace:
- err = vma_replace_policy(vma, new_pol);
+ pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
+ merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff, new_pol,
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+ if (merged) {
+ *prev = merged;
+ return vma_replace_policy(merged, new_pol);
+ }
+
+ if (vma->vm_start != vmstart) {
+ err = split_vma(vmi, vma, vmstart, 1);
if (err)
- goto out;
-next:
- prev = vma;
- } for_each_vma_range(vmi, vma, end);
+ return err;
+ }
-out:
- return err;
+ if (vma->vm_end != vmend) {
+ err = split_vma(vmi, vma, vmend, 0);
+ if (err)
+ return err;
+ }
+
+ *prev = vma;
+ return vma_replace_policy(vma, new_pol);
}
/* Set the process memory policy */
@@ -1259,6 +1248,8 @@ static long do_mbind(unsigned long start, unsigned long len,
nodemask_t *nmask, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev;
+ struct vma_iterator vmi;
struct mempolicy *new;
unsigned long end;
int err;
@@ -1328,7 +1319,13 @@ static long do_mbind(unsigned long start, unsigned long len,
goto up_out;
}
- err = mbind_range(mm, start, end, new);
+ vma_iter_init(&vmi, mm, start);
+ prev = vma_prev(&vmi);
+ for_each_vma_range(vmi, vma, end) {
+ err = mbind_range(&vmi, vma, &prev, start, end, new);
+ if (err)
+ break;
+ }
if (!err) {
int nr_failed = 0;
@@ -1489,10 +1486,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long, home_node, unsigned long, flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct mempolicy *new, *old;
- unsigned long vmstart;
- unsigned long vmend;
unsigned long end;
int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
@@ -1521,6 +1516,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start)
return 0;
mmap_write_lock(mm);
+ prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
/*
* If any vma in the range got policy other than MPOL_BIND
@@ -1541,9 +1537,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
}
new->home_node = home_node;
- vmstart = max(start, vma->vm_start);
- vmend = min(end, vma->vm_end);
- err = mbind_range(mm, vmstart, vmend, new);
+ err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);
if (err)
break;
diff --git a/mm/mmap.c b/mm/mmap.c
index ad499f7b767f..d5475fbf5729 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1518,7 +1518,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
*/
static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, low_limit;
+ struct vm_area_struct *tmp;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);
@@ -1527,12 +1528,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
- length))
+ low_limit = info->low_limit;
+retry:
+ if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
return -ENOMEM;
gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) < gap + length - 1) {
+ low_limit = tmp->vm_end;
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ low_limit = vm_end_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
@@ -1548,7 +1566,8 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
*/
static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, high_limit, gap_end;
+ struct vm_area_struct *tmp;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);
/* Adjust search length to account for worst case alignment overhead */
@@ -1556,12 +1575,31 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+ high_limit = info->high_limit;
+retry:
+ if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
length))
return -ENOMEM;
gap = mas.last + 1 - info->length;
gap -= (gap - info->align_offset) & info->align_mask;
+ gap_end = mas.last;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) <= gap_end) {
+ high_limit = vm_start_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ high_limit = tmp->vm_start;
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
@@ -2277,7 +2315,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
int count = 0;
int error = -ENOMEM;
MA_STATE(mas_detach, &mt_detach, 0, 0);
- mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
+ mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_set_external_lock(&mt_detach, &mm->mmap_lock);
/*
@@ -3037,6 +3075,7 @@ void exit_mmap(struct mm_struct *mm)
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm);
+ mt_clear_in_rcu(&mm->mm_mt);
free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 13e84d8c0797..36351a00c0e8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -838,7 +838,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
tlb_finish_mmu(&tlb);
- if (vma_iter_end(&vmi) < end)
+ if (!error && vma_iter_end(&vmi) < end)
error = -ENOMEM;
out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7136c36c5d01..8e39705c7bdc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6632,7 +6632,21 @@ static void __build_all_zonelists(void *data)
int nid;
int __maybe_unused cpu;
pg_data_t *self = data;
+ unsigned long flags;
+ /*
+ * Explicitly disable this CPU's interrupts before taking seqlock
+ * to prevent any IRQ handler from calling into the page allocator
+ * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
+ */
+ local_irq_save(flags);
+ /*
+ * Explicitly disable this CPU's synchronous printk() before taking
+ * seqlock to prevent any printk() from trying to hold port->lock, for
+ * tty_insert_flip_string_and_push_buffer() on other CPU might be
+ * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
+ */
+ printk_deferred_enter();
write_seqlock(&zonelist_update_seq);
#ifdef CONFIG_NUMA
@@ -6671,6 +6685,8 @@ static void __build_all_zonelists(void *data)
}
write_sequnlock(&zonelist_update_seq);
+ printk_deferred_exit();
+ local_irq_restore(flags);
}
static noinline void __init
@@ -9450,6 +9466,9 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
if (PageReserved(page))
return false;
+
+ if (PageHuge(page))
+ return false;
}
return true;
}
diff --git a/mm/swap.c b/mm/swap.c
index 57cb01b042f6..423199ee8478 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -222,7 +222,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
if (lruvec)
unlock_page_lruvec_irqrestore(lruvec, flags);
folios_put(fbatch->folios, folio_batch_count(fbatch));
- folio_batch_init(fbatch);
+ folio_batch_reinit(fbatch);
}
static void folio_batch_add_and_move(struct folio_batch *fbatch,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 62ba2bf577d7..2c718f45745f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -679,6 +679,7 @@ static void __del_from_avail_list(struct swap_info_struct *p)
{
int nid;
+ assert_spin_locked(&p->lock);
for_each_node(nid)
plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]);
}
@@ -2434,8 +2435,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&swap_lock);
goto out_dput;
}
- del_from_avail_list(p);
spin_lock(&p->lock);
+ del_from_avail_list(p);
if (p->prio < 0) {
struct swap_info_struct *si = p;
int nid;
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 4c3164beacec..83c164aba6e0 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -173,7 +173,7 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
return;
}
- if (is_vmalloc_addr(ptr)) {
+ if (is_vmalloc_addr(ptr) && !pagefault_disabled()) {
struct vmap_area *area = find_vmap_area(addr);
if (!area)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bef6cf2b4d46..31ff782d368b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -313,8 +313,8 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
ioremap_max_page_shift);
flush_cache_vmap(addr, end);
if (!err)
- kmsan_ioremap_page_range(addr, end, phys_addr, prot,
- ioremap_max_page_shift);
+ err = kmsan_ioremap_page_range(addr, end, phys_addr, prot,
+ ioremap_max_page_shift);
return err;
}
@@ -605,7 +605,11 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
- kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
+ page_shift);
+
+ if (ret)
+ return ret;
return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
}
@@ -3042,9 +3046,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- warn_alloc(gfp_mask, NULL,
- "vmalloc error: size %lu, page order %u, failed to allocate pages",
- area->nr_pages * PAGE_SIZE, page_order);
+ /* vm_area_alloc_pages() can also fail due to a fatal signal */
+ if (!fatal_signal_pending(current))
+ warn_alloc(gfp_mask, NULL,
+ "vmalloc error: size %lu, page order %u, failed to allocate pages",
+ area->nr_pages * PAGE_SIZE, page_order);
goto fail;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5920544e93e8..870e4935d6e6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -26,6 +26,7 @@
#include <linux/ethtool.h>
#include <linux/phy.h>
#include <net/arp.h>
+#include <net/macsec.h>
#include "vlan.h"
#include "vlanproc.h"
@@ -572,6 +573,9 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
+ if (real_dev->vlan_features & NETIF_F_HW_MACSEC)
+ dev->hw_features |= NETIF_F_HW_MACSEC;
+
dev->features |= dev->hw_features | NETIF_F_LLTX;
netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
@@ -803,6 +807,241 @@ static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
return 0;
}
+#if IS_ENABLED(CONFIG_MACSEC)
+
+static const struct macsec_ops *vlan_get_macsec_ops(const struct macsec_context *ctx)
+{
+ return vlan_dev_priv(ctx->netdev)->real_dev->macsec_ops;
+}
+
+static int vlan_macsec_offload(int (* const func)(struct macsec_context *),
+ struct macsec_context *ctx)
+{
+ if (unlikely(!func))
+ return 0;
+
+ return (*func)(ctx);
+}
+
+static int vlan_macsec_dev_open(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_dev_open, ctx);
+}
+
+static int vlan_macsec_dev_stop(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_dev_stop, ctx);
+}
+
+static int vlan_macsec_add_secy(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_add_secy, ctx);
+}
+
+static int vlan_macsec_upd_secy(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_upd_secy, ctx);
+}
+
+static int vlan_macsec_del_secy(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_del_secy, ctx);
+}
+
+static int vlan_macsec_add_rxsc(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_add_rxsc, ctx);
+}
+
+static int vlan_macsec_upd_rxsc(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_upd_rxsc, ctx);
+}
+
+static int vlan_macsec_del_rxsc(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_del_rxsc, ctx);
+}
+
+static int vlan_macsec_add_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_add_rxsa, ctx);
+}
+
+static int vlan_macsec_upd_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_upd_rxsa, ctx);
+}
+
+static int vlan_macsec_del_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_del_rxsa, ctx);
+}
+
+static int vlan_macsec_add_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_add_txsa, ctx);
+}
+
+static int vlan_macsec_upd_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_upd_txsa, ctx);
+}
+
+static int vlan_macsec_del_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_del_txsa, ctx);
+}
+
+static int vlan_macsec_get_dev_stats(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_get_dev_stats, ctx);
+}
+
+static int vlan_macsec_get_tx_sc_stats(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_get_tx_sc_stats, ctx);
+}
+
+static int vlan_macsec_get_tx_sa_stats(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_get_tx_sa_stats, ctx);
+}
+
+static int vlan_macsec_get_rx_sc_stats(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_get_rx_sc_stats, ctx);
+}
+
+static int vlan_macsec_get_rx_sa_stats(struct macsec_context *ctx)
+{
+ const struct macsec_ops *ops = vlan_get_macsec_ops(ctx);
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ return vlan_macsec_offload(ops->mdo_get_rx_sa_stats, ctx);
+}
+
+static const struct macsec_ops macsec_offload_ops = {
+ /* Device wide */
+ .mdo_dev_open = vlan_macsec_dev_open,
+ .mdo_dev_stop = vlan_macsec_dev_stop,
+ /* SecY */
+ .mdo_add_secy = vlan_macsec_add_secy,
+ .mdo_upd_secy = vlan_macsec_upd_secy,
+ .mdo_del_secy = vlan_macsec_del_secy,
+ /* Security channels */
+ .mdo_add_rxsc = vlan_macsec_add_rxsc,
+ .mdo_upd_rxsc = vlan_macsec_upd_rxsc,
+ .mdo_del_rxsc = vlan_macsec_del_rxsc,
+ /* Security associations */
+ .mdo_add_rxsa = vlan_macsec_add_rxsa,
+ .mdo_upd_rxsa = vlan_macsec_upd_rxsa,
+ .mdo_del_rxsa = vlan_macsec_del_rxsa,
+ .mdo_add_txsa = vlan_macsec_add_txsa,
+ .mdo_upd_txsa = vlan_macsec_upd_txsa,
+ .mdo_del_txsa = vlan_macsec_del_txsa,
+ /* Statistics */
+ .mdo_get_dev_stats = vlan_macsec_get_dev_stats,
+ .mdo_get_tx_sc_stats = vlan_macsec_get_tx_sc_stats,
+ .mdo_get_tx_sa_stats = vlan_macsec_get_tx_sa_stats,
+ .mdo_get_rx_sc_stats = vlan_macsec_get_rx_sc_stats,
+ .mdo_get_rx_sa_stats = vlan_macsec_get_rx_sa_stats,
+};
+
+#endif
+
static const struct ethtool_ops vlan_ethtool_ops = {
.get_link_ksettings = vlan_ethtool_get_link_ksettings,
.get_drvinfo = vlan_ethtool_get_drvinfo,
@@ -869,6 +1108,9 @@ void vlan_setup(struct net_device *dev)
dev->priv_destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
+#if IS_ENABLED(CONFIG_MACSEC)
+ dev->macsec_ops = &macsec_offload_ops;
+#endif
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c64050e839ac..1fffe2bed5b0 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -280,6 +280,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
write_unlock(&xen_9pfs_lock);
for (i = 0; i < priv->num_rings; i++) {
+ struct xen_9pfs_dataring *ring = &priv->rings[i];
+
+ cancel_work_sync(&ring->work);
+
if (!priv->rings[i].intf)
break;
if (priv->rings[i].irq > 0)
diff --git a/net/Kconfig b/net/Kconfig
index f806722bccf4..7d39c1773eb4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -68,6 +68,26 @@ source "net/iucv/Kconfig"
source "net/smc/Kconfig"
source "net/xdp/Kconfig"
+config NET_HANDSHAKE
+ bool
+ depends on SUNRPC || NVME_TARGET_TCP || NVME_TCP
+ default y
+
+config NET_HANDSHAKE_KUNIT_TEST
+ tristate "KUnit tests for the handshake upcall mechanism" if !KUNIT_ALL_TESTS
+ default KUNIT_ALL_TESTS
+ depends on KUNIT
+ help
+ This builds the KUnit tests for the handshake upcall mechanism.
+
+ KUnit tests run during boot and output the results to the debug
+ log in TAP format (https://testanything.org/). Only useful for
+ kernel devs running KUnit test harness and are not for inclusion
+ into a production build.
+
+ For more information on KUnit and unit tests in general, refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
config INET
bool "TCP/IP networking"
help
diff --git a/net/Makefile b/net/Makefile
index 0914bea9c335..4c4dc535453d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,7 +24,7 @@ obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
obj-$(CONFIG_NET_DEVLINK) += devlink/
-obj-$(CONFIG_NET_DSA) += dsa/
+obj-y += dsa/
obj-$(CONFIG_ATALK) += appletalk/
obj-$(CONFIG_X25) += x25/
obj-$(CONFIG_LAPB) += lapb/
@@ -79,3 +79,4 @@ obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
obj-$(CONFIG_MCTP) += mctp/
+obj-$(CONFIG_NET_HANDSHAKE) += handshake/
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 17b946f9ba31..8455ba141ee6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -68,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = {
};
/* This function requires the caller holds hdev->lock */
-static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
+static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
{
struct hci_conn_params *params;
struct hci_dev *hdev = conn->hdev;
@@ -88,9 +88,28 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr,
bdaddr_type);
- if (!params || !params->explicit_connect)
+ if (!params)
return;
+ if (params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+ params->conn = NULL;
+ }
+
+ if (!params->explicit_connect)
+ return;
+
+ /* If the status indicates successful cancellation of
+ * the attempt (i.e. Unknown Connection Id) there's no point of
+ * notifying failure since we'll go back to keep trying to
+ * connect. The only exception is explicit connect requests
+ * where a timeout + cancel does indicate an actual failure.
+ */
+ if (status && status != HCI_ERROR_UNKNOWN_CONN_ID)
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+
/* The connection attempt was doing scan for new RPA, and is
* in scan phase. If params are not associated with any other
* autoconnect action, remove them completely. If they are, just unmark
@@ -178,7 +197,7 @@ static void le_scan_cleanup(struct work_struct *work)
rcu_read_unlock();
if (c == conn) {
- hci_connect_le_scan_cleanup(conn);
+ hci_connect_le_scan_cleanup(conn, 0x00);
hci_conn_cleanup(conn);
}
@@ -1049,6 +1068,17 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
return conn;
}
+static bool hci_conn_unlink(struct hci_conn *conn)
+{
+ if (!conn->link)
+ return false;
+
+ conn->link->link = NULL;
+ conn->link = NULL;
+
+ return true;
+}
+
int hci_conn_del(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -1060,15 +1090,16 @@ int hci_conn_del(struct hci_conn *conn)
cancel_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
- struct hci_conn *sco = conn->link;
- if (sco) {
- sco->link = NULL;
+ struct hci_conn *link = conn->link;
+
+ if (link) {
+ hci_conn_unlink(conn);
/* Due to race, SCO connection might be not established
* yet at this point. Delete it now, otherwise it is
* possible for it to be stuck and can't be deleted.
*/
- if (sco->handle == HCI_CONN_HANDLE_UNSET)
- hci_conn_del(sco);
+ if (link->handle == HCI_CONN_HANDLE_UNSET)
+ hci_conn_del(link);
}
/* Unacked frames */
@@ -1084,7 +1115,7 @@ int hci_conn_del(struct hci_conn *conn)
struct hci_conn *acl = conn->link;
if (acl) {
- acl->link = NULL;
+ hci_conn_unlink(conn);
hci_conn_drop(acl);
}
@@ -1179,31 +1210,8 @@ EXPORT_SYMBOL(hci_get_route);
static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
- struct hci_conn_params *params;
- params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
- conn->dst_type);
- if (params && params->conn) {
- hci_conn_drop(params->conn);
- hci_conn_put(params->conn);
- params->conn = NULL;
- }
-
- /* If the status indicates successful cancellation of
- * the attempt (i.e. Unknown Connection Id) there's no point of
- * notifying failure since we'll go back to keep trying to
- * connect. The only exception is explicit connect requests
- * where a timeout + cancel does indicate an actual failure.
- */
- if (status != HCI_ERROR_UNKNOWN_CONN_ID ||
- (params && params->explicit_connect))
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
-
- /* Since we may have temporarily stopped the background scanning in
- * favor of connection establishment, we should restart it.
- */
- hci_update_passive_scan(hdev);
+ hci_connect_le_scan_cleanup(conn, status);
/* Enable advertising in case this was a failed connection
* attempt as a peripheral.
@@ -1237,15 +1245,15 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
+ bt_dev_dbg(hdev, "err %d", err);
+
hci_dev_lock(hdev);
if (!err) {
- hci_connect_le_scan_cleanup(conn);
+ hci_connect_le_scan_cleanup(conn, 0x00);
goto done;
}
- bt_dev_err(hdev, "request failed to create LE connection: err %d", err);
-
/* Check if connection is still pending */
if (conn != hci_lookup_le_connect(hdev))
goto done;
@@ -2438,6 +2446,12 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
c->state = BT_CLOSED;
hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
+
+ /* Unlink before deleting otherwise it is possible that
+ * hci_conn_del removes the link which may cause the list to
+ * contain items already freed.
+ */
+ hci_conn_unlink(c);
hci_conn_del(c);
}
}
@@ -2775,6 +2789,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
{
int r = 0;
+ if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ return 0;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ad92a4be5851..e87c928c9e17 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2881,16 +2881,6 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
conn->resp_addr_type = peer_addr_type;
bacpy(&conn->resp_addr, peer_addr);
-
- /* We don't want the connection attempt to stick around
- * indefinitely since LE doesn't have a page timeout concept
- * like BR/EDR. Set a timer for any connection that doesn't use
- * the accept list for connecting.
- */
- if (filter_policy == HCI_LE_USE_PEER_ADDR)
- queue_delayed_work(conn->hdev->workqueue,
- &conn->le_conn_timeout,
- conn->conn_timeout);
}
static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
@@ -5902,6 +5892,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
if (status)
goto unlock;
+ /* Drop the connection if it has been aborted */
+ if (test_bit(HCI_CONN_CANCEL, &conn->flags)) {
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
if (conn->dst_type == ADDR_LE_DEV_PUBLIC)
addr_type = BDADDR_LE_PUBLIC;
else
@@ -6995,7 +6991,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.in.latency = le16_to_cpu(ev->interval) * 125 / 100;
bis->iso_qos.in.sdu = le16_to_cpu(ev->max_pdu);
- hci_connect_cfm(bis, ev->status);
+ hci_iso_setup_path(bis);
}
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 5a6aa1627791..632be1267288 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -246,8 +246,9 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk);
if (IS_ERR(skb)) {
- bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode,
- PTR_ERR(skb));
+ if (!event)
+ bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode,
+ PTR_ERR(skb));
return PTR_ERR(skb);
}
@@ -5126,8 +5127,11 @@ static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
if (test_bit(HCI_CONN_SCANNING, &conn->flags))
return 0;
+ if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ return 0;
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
- 6, &conn->dst, HCI_CMD_TIMEOUT);
+ 0, NULL, HCI_CMD_TIMEOUT);
}
static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -6102,6 +6106,9 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
conn->conn_timeout, NULL);
done:
+ if (err == -ETIMEDOUT)
+ hci_le_connect_cancel_sync(hdev, conn);
+
/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
return err;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index bed1a7b9205c..707f229f896a 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -433,7 +433,7 @@ static void hidp_set_timer(struct hidp_session *session)
static void hidp_del_timer(struct hidp_session *session)
{
if (session->idle_to > 0)
- del_timer(&session->timer);
+ del_timer_sync(&session->timer);
}
static void hidp_process_report(struct hidp_session *session, int type,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 49926f59cc12..55a7226233f9 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4652,33 +4652,27 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
- mutex_lock(&conn->chan_lock);
-
- chan = __l2cap_get_chan_by_scid(conn, dcid);
+ chan = l2cap_get_chan_by_scid(conn, dcid);
if (!chan) {
- mutex_unlock(&conn->chan_lock);
cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid);
return 0;
}
- l2cap_chan_hold(chan);
- l2cap_chan_lock(chan);
-
rsp.dcid = cpu_to_le16(chan->scid);
rsp.scid = cpu_to_le16(chan->dcid);
l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
chan->ops->set_shutdown(chan);
+ mutex_lock(&conn->chan_lock);
l2cap_chan_del(chan, ECONNRESET);
+ mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
-
return 0;
}
@@ -4698,33 +4692,27 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
- mutex_lock(&conn->chan_lock);
-
- chan = __l2cap_get_chan_by_scid(conn, scid);
+ chan = l2cap_get_chan_by_scid(conn, scid);
if (!chan) {
mutex_unlock(&conn->chan_lock);
return 0;
}
- l2cap_chan_hold(chan);
- l2cap_chan_lock(chan);
-
if (chan->state != BT_DISCONN) {
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
return 0;
}
+ mutex_lock(&conn->chan_lock);
l2cap_chan_del(chan, 0);
+ mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
-
return 0;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 1111da4e2f2b..cd1a27ac555d 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -235,27 +235,41 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
return err;
}
-static int sco_connect(struct hci_dev *hdev, struct sock *sk)
+static int sco_connect(struct sock *sk)
{
struct sco_conn *conn;
struct hci_conn *hcon;
+ struct hci_dev *hdev;
int err, type;
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
+ hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
+ if (!hdev)
+ return -EHOSTUNREACH;
+
+ hci_dev_lock(hdev);
+
if (lmp_esco_capable(hdev) && !disable_esco)
type = ESCO_LINK;
else
type = SCO_LINK;
if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)))
- return -EOPNOTSUPP;
+ (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
sco_pi(sk)->setting, &sco_pi(sk)->codec);
- if (IS_ERR(hcon))
- return PTR_ERR(hcon);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
+
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
conn = sco_conn_add(hcon);
if (!conn) {
@@ -263,13 +277,15 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk)
return -ENOMEM;
}
- /* Update source addr of the socket */
- bacpy(&sco_pi(sk)->src, &hcon->src);
-
err = sco_chan_add(conn, sk, NULL);
if (err)
return err;
+ lock_sock(sk);
+
+ /* Update source addr of the socket */
+ bacpy(&sco_pi(sk)->src, &hcon->src);
+
if (hcon->state == BT_CONNECTED) {
sco_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -278,6 +294,13 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk)
sco_sock_set_timer(sk, sk->sk_sndtimeo);
}
+ release_sock(sk);
+
+ return err;
+
+unlock:
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
return err;
}
@@ -565,7 +588,6 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
- struct hci_dev *hdev;
int err;
BT_DBG("sk %p", sk);
@@ -574,37 +596,26 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- lock_sock(sk);
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
- err = -EBADFD;
- goto done;
- }
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+ return -EBADFD;
- if (sk->sk_type != SOCK_SEQPACKET) {
+ if (sk->sk_type != SOCK_SEQPACKET)
err = -EINVAL;
- goto done;
- }
-
- hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev) {
- err = -EHOSTUNREACH;
- goto done;
- }
- hci_dev_lock(hdev);
+ lock_sock(sk);
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
+ release_sock(sk);
- err = sco_connect(hdev, sk);
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ err = sco_connect(sk);
if (err)
- goto done;
+ return err;
+
+ lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CONNECTED,
sock_sndtimeo(sk, flags & O_NONBLOCK));
-done:
release_sock(sk);
return err;
}
@@ -1129,6 +1140,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
break;
}
+ release_sock(sk);
+
/* find total buffer size required to copy codec + caps */
hci_dev_lock(hdev);
list_for_each_entry(c, &hdev->local_codecs, list) {
@@ -1146,15 +1159,13 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
buf_len += sizeof(struct bt_codecs);
if (buf_len > len) {
hci_dev_put(hdev);
- err = -ENOBUFS;
- break;
+ return -ENOBUFS;
}
ptr = optval;
if (put_user(num_codecs, ptr)) {
hci_dev_put(hdev);
- err = -EFAULT;
- break;
+ return -EFAULT;
}
ptr += sizeof(num_codecs);
@@ -1194,12 +1205,14 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
ptr += len;
}
- if (!err && put_user(buf_len, optlen))
- err = -EFAULT;
-
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ lock_sock(sk);
+
+ if (!err && put_user(buf_len, optlen))
+ err = -EFAULT;
+
break;
default:
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index ff4f89a2b02a..5918d1b32e19 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -173,14 +173,11 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t,
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ int off, int size)
{
const struct btf_type *state;
const struct btf_type *t;
s32 type_id;
- int err;
type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state",
BTF_KIND_STRUCT);
@@ -194,11 +191,12 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
- if (err < 0)
- return err;
+ if (off + size > sizeof(struct bpf_dummy_ops_state)) {
+ bpf_log(log, "write access at off %d with size %d\n", off, size);
+ return -EACCES;
+ }
- return atype == BPF_READ ? err : NOT_INIT;
+ return NOT_INIT;
}
static const struct bpf_verifier_ops bpf_dummy_verifier_ops = {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index d350f31c7a3d..0b9bd9b39990 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -215,6 +215,16 @@ static void xdp_test_run_teardown(struct xdp_test_data *xdp)
kfree(xdp->skbs);
}
+static bool frame_was_changed(const struct xdp_page_head *head)
+{
+ /* xdp_scrub_frame() zeroes the data pointer, flags is the last field,
+ * i.e. has the highest chances to be overwritten. If those two are
+ * untouched, it's most likely safe to skip the context reset.
+ */
+ return head->frame->data != head->orig_ctx.data ||
+ head->frame->flags != head->orig_ctx.flags;
+}
+
static bool ctx_was_changed(struct xdp_page_head *head)
{
return head->orig_ctx.data != head->ctx.data ||
@@ -224,7 +234,7 @@ static bool ctx_was_changed(struct xdp_page_head *head)
static void reset_ctx(struct xdp_page_head *head)
{
- if (likely(!ctx_was_changed(head)))
+ if (likely(!frame_was_changed(head) && !ctx_was_changed(head)))
return;
head->ctx.data = head->orig_ctx.data;
@@ -538,6 +548,11 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
return (long)arg->a;
}
+__bpf_kfunc u32 bpf_fentry_test9(u32 *a)
+{
+ return *a;
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
@@ -567,6 +582,11 @@ long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
return (long)a + (long)b + (long)c + d;
}
+int noinline bpf_fentry_shadow_test(int a)
+{
+ return a + 1;
+}
+
struct prog_test_member1 {
int a;
};
@@ -598,6 +618,11 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
return &prog_test_struct;
}
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+ WARN_ON_ONCE(1);
+}
+
__bpf_kfunc struct prog_test_member *
bpf_kfunc_call_memb_acquire(void)
{
@@ -607,9 +632,6 @@ bpf_kfunc_call_memb_acquire(void)
__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
- if (!p)
- return;
-
refcount_dec(&p->cnt);
}
@@ -795,6 +817,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
@@ -844,7 +867,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
- bpf_fentry_test8(&arg) != 0)
+ bpf_fentry_test8(&arg) != 0 ||
+ bpf_fentry_test9(&retval) != 0)
goto out;
break;
case BPF_MODIFY_RETURN:
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index b45c00c01dea..c7869a286df4 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -30,7 +30,7 @@ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
bool neigh_suppress = false;
list_for_each_entry(p, &br->port_list, list) {
- if (p->flags & BR_NEIGH_SUPPRESS) {
+ if (p->flags & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS)) {
neigh_suppress = true;
break;
}
@@ -158,7 +158,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
return;
if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
- if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ if (br_is_neigh_suppress_enabled(p, vid))
return;
if (parp->ar_op != htons(ARPOP_RREQUEST) &&
parp->ar_op != htons(ARPOP_RREPLY) &&
@@ -202,8 +202,8 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
bool replied = false;
if ((p && (p->flags & BR_PROXYARP)) ||
- (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
- BR_NEIGH_SUPPRESS)))) {
+ (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) ||
+ br_is_neigh_suppress_enabled(f->dst, vid)) {
if (!vid)
br_arp_send(br, p, skb->dev, sip, tip,
sha, n->ha, sha, 0, 0);
@@ -407,7 +407,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
- if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ if (br_is_neigh_suppress_enabled(p, vid))
return;
if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
@@ -461,7 +461,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
if (f) {
bool replied = false;
- if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
+ if (br_is_neigh_suppress_enabled(f->dst, vid)) {
if (vid != 0)
br_nd_send(br, p, skb, n,
skb->vlan_proto,
@@ -483,3 +483,24 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
}
}
#endif
+
+bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid)
+{
+ if (!p)
+ return false;
+
+ if (!vid)
+ return !!(p->flags & BR_NEIGH_SUPPRESS);
+
+ if (p->flags & BR_NEIGH_VLAN_SUPPRESS) {
+ struct net_bridge_vlan_group *vg = nbp_vlan_group_rcu(p);
+ struct net_bridge_vlan *v;
+
+ v = br_vlan_find(vg, vid);
+ if (!v)
+ return false;
+ return !!(v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED);
+ } else {
+ return !!(p->flags & BR_NEIGH_SUPPRESS);
+ }
+}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index df47c876230e..8eca8a5c80c6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -80,10 +80,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
dest = eth_hdr(skb)->h_dest;
if (is_broadcast_ether_addr(dest)) {
- br_flood(br, skb, BR_PKT_BROADCAST, false, true);
+ br_flood(br, skb, BR_PKT_BROADCAST, false, true, vid);
} else if (is_multicast_ether_addr(dest)) {
if (unlikely(netpoll_tx_running(dev))) {
- br_flood(br, skb, BR_PKT_MULTICAST, false, true);
+ br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
goto out;
}
if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
@@ -96,11 +96,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
br_multicast_flood(mdst, skb, brmctx, false, true);
else
- br_flood(br, skb, BR_PKT_MULTICAST, false, true);
+ br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
br_forward(dst->dst, skb, false, true);
} else {
- br_flood(br, skb, BR_PKT_UNICAST, false, true);
+ br_flood(br, skb, BR_PKT_UNICAST, false, true, vid);
}
out:
rcu_read_unlock();
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 02bb620d3b8d..57744704ff69 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -197,7 +197,8 @@ out:
/* called under rcu_read_lock */
void br_flood(struct net_bridge *br, struct sk_buff *skb,
- enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
+ enum br_pkt_type pkt_type, bool local_rcv, bool local_orig,
+ u16 vid)
{
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
@@ -224,8 +225,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
- if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
- BR_INPUT_SKB_CB(skb)->proxyarp_replied)
+ if (BR_INPUT_SKB_CB(skb)->proxyarp_replied &&
+ ((p->flags & BR_PROXYARP_WIFI) ||
+ br_is_neigh_suppress_enabled(p, vid)))
continue;
prev = maybe_deliver(prev, p, skb, local_orig);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 24f01ff113f0..3f04b40f6056 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -759,7 +759,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_AUTO_MASK)
nbp_update_port_count(br);
- if (mask & BR_NEIGH_SUPPRESS)
+ if (mask & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS))
br_recalculate_neigh_suppress_enabled(br);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3027e8f6be15..fc17b9fd93e6 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -207,7 +207,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
br_forward(dst->dst, skb, local_rcv, false);
} else {
if (!mcast_hit)
- br_flood(br, skb, pkt_type, local_rcv, false);
+ br_flood(br, skb, pkt_type, local_rcv, false, vid);
else
br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 3e3065bc0465..1a801fab9543 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -869,12 +869,17 @@ static unsigned int ip_sabotage_in(void *priv,
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
- if (nf_bridge && !nf_bridge->in_prerouting &&
- !netif_is_l3_master(skb->dev) &&
- !netif_is_l3_slave(skb->dev)) {
- nf_bridge_info_free(skb);
- state->okfn(state->net, state->sk, skb);
- return NF_STOLEN;
+ if (nf_bridge) {
+ if (nf_bridge->sabotage_in_done)
+ return NF_ACCEPT;
+
+ if (!nf_bridge->in_prerouting &&
+ !netif_is_l3_master(skb->dev) &&
+ !netif_is_l3_slave(skb->dev)) {
+ nf_bridge->sabotage_in_done = 1;
+ state->okfn(state->net, state->sk, skb);
+ return NF_STOLEN;
+ }
}
return NF_ACCEPT;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fefb1c0e248b..05c5863d2e20 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -189,6 +189,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ nla_total_size(1) /* IFLA_BRPORT_LOCKED */
+ nla_total_size(1) /* IFLA_BRPORT_MAB */
+ + nla_total_size(1) /* IFLA_BRPORT_NEIGH_VLAN_SUPPRESS */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -278,7 +279,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED)) ||
- nla_put_u8(skb, IFLA_BRPORT_MAB, !!(p->flags & BR_PORT_MAB)))
+ nla_put_u8(skb, IFLA_BRPORT_MAB, !!(p->flags & BR_PORT_MAB)) ||
+ nla_put_u8(skb, IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
+ !!(p->flags & BR_NEIGH_VLAN_SUPPRESS)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -891,6 +894,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
+ [IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
};
/* Change the state of the port and notify spanning tree */
@@ -957,6 +961,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED);
br_set_port_flag(p, tb, IFLA_BRPORT_MAB, BR_PORT_MAB);
+ br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
+ BR_NEIGH_VLAN_SUPPRESS);
if ((p->flags & BR_PORT_MAB) &&
(!(p->flags & BR_PORT_LOCKED) || !(p->flags & BR_LEARNING))) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7264fd40f82f..2119729ded2b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -178,6 +178,7 @@ enum {
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
BR_VLFLAG_MCAST_ENABLED = BIT(2),
BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
+ BR_VLFLAG_NEIGH_SUPPRESS_ENABLED = BIT(4),
};
/**
@@ -849,7 +850,8 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
bool local_rcv, bool local_orig);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
void br_flood(struct net_bridge *br, struct sk_buff *skb,
- enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
+ enum br_pkt_type pkt_type, bool local_rcv, bool local_orig,
+ u16 vid);
/* return true if both source port and dest port are isolated */
static inline bool br_skb_isolated(const struct net_bridge_port *to,
@@ -2218,4 +2220,5 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
+bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid);
#endif
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index de18e9c1d7a7..ba95c4d74a60 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -148,6 +148,17 @@ br_switchdev_fdb_notify(struct net_bridge *br,
if (test_bit(BR_FDB_LOCKED, &fdb->flags))
return;
+ /* Entries with these flags were created using ndm_state == NUD_REACHABLE,
+ * ndm_flags == NTF_MASTER( | NTF_STICKY), ext_flags == 0 by something
+ * equivalent to 'bridge fdb add ... master dynamic (sticky)'.
+ * Drivers don't know how to deal with these, so don't notify them to
+ * avoid confusing them.
+ */
+ if (test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags) &&
+ !test_bit(BR_FDB_STATIC, &fdb->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
+ return;
+
br_switchdev_fdb_populate(br, &item, fdb, NULL);
switch (type) {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8a3dbc09ba38..15f44d026e75 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -2134,6 +2134,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
+ [BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
};
static int br_vlan_rtm_process_one(struct net_device *dev,
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index e378c2f3a9e2..8fa89b04ee94 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -52,7 +52,9 @@ bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
const struct net_bridge_port *p)
{
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
- !__vlan_tun_put(skb, v))
+ !__vlan_tun_put(skb, v) ||
+ nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS,
+ !!(v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED)))
return false;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -80,6 +82,7 @@ size_t br_vlan_opts_nl_size(void)
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS */
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS */
#endif
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS */
+ 0;
}
@@ -239,6 +242,21 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
}
#endif
+ if (tb[BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS]) {
+ bool enabled = v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED;
+ bool val = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS]);
+
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set neigh_suppress for non-port vlans");
+ return -EINVAL;
+ }
+
+ if (val != enabled) {
+ v->priv_flags ^= BR_VLFLAG_NEIGH_SUPPRESS_ENABLED;
+ *changed = true;
+ }
+ }
+
return 0;
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index f1ea91772ae7..a750259cb79c 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -130,7 +130,8 @@ enum {
ISOTP_WAIT_FIRST_FC,
ISOTP_WAIT_FC,
ISOTP_WAIT_DATA,
- ISOTP_SENDING
+ ISOTP_SENDING,
+ ISOTP_SHUTDOWN,
};
struct tpcon {
@@ -903,8 +904,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
txtimer);
struct sock *sk = &so->sk;
- /* don't handle timeouts in IDLE state */
- if (so->tx.state == ISOTP_IDLE)
+ /* don't handle timeouts in IDLE or SHUTDOWN state */
+ if (so->tx.state == ISOTP_IDLE || so->tx.state == ISOTP_SHUTDOWN)
return HRTIMER_NORESTART;
/* we did not get any flow control or echo frame in time */
@@ -941,7 +942,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct isotp_sock *so = isotp_sk(sk);
- u32 old_state = so->tx.state;
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf;
@@ -951,23 +951,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
int off;
int err;
- if (!so->bound)
+ if (!so->bound || so->tx.state == ISOTP_SHUTDOWN)
return -EADDRNOTAVAIL;
+wait_free_buffer:
/* we do not support multiple buffers - for now */
- if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE ||
- wq_has_sleeper(&so->wait)) {
- if (msg->msg_flags & MSG_DONTWAIT) {
- err = -EAGAIN;
- goto err_out;
- }
+ if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT))
+ return -EAGAIN;
- /* wait for complete transmission of current pdu */
- err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
- if (err)
- goto err_out;
+ /* wait for complete transmission of current pdu */
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+ goto err_event_drop;
- so->tx.state = ISOTP_SENDING;
+ if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) {
+ if (so->tx.state == ISOTP_SHUTDOWN)
+ return -EADDRNOTAVAIL;
+
+ goto wait_free_buffer;
}
/* PDU size > default => try max_pdu_size */
@@ -1107,7 +1108,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (wait_tx_done) {
/* wait for complete transmission of current pdu */
- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+ goto err_event_drop;
if (sk->sk_err)
return -sk->sk_err;
@@ -1115,13 +1118,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return size;
+err_event_drop:
+ /* got signal: force tx state machine to be idle */
+ so->tx.state = ISOTP_IDLE;
+ hrtimer_cancel(&so->txfrtimer);
+ hrtimer_cancel(&so->txtimer);
err_out_drop:
/* drop this PDU and unlock a potential wait queue */
- old_state = ISOTP_IDLE;
-err_out:
- so->tx.state = old_state;
- if (so->tx.state == ISOTP_IDLE)
- wake_up_interruptible(&so->wait);
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
return err;
}
@@ -1153,7 +1158,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (ret < 0)
goto out_err;
- sock_recv_timestamp(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(ISOTP_MIN_NAMELEN);
@@ -1183,10 +1188,12 @@ static int isotp_release(struct socket *sock)
net = sock_net(sk);
/* wait for complete transmission of current pdu */
- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 &&
+ cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE)
+ ;
/* force state machines to be idle also when a signal occurred */
- so->tx.state = ISOTP_IDLE;
+ so->tx.state = ISOTP_SHUTDOWN;
so->rx.state = ISOTP_IDLE;
spin_lock(&isotp_notifier_lock);
@@ -1652,6 +1659,21 @@ static int isotp_init(struct sock *sk)
return 0;
}
+static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+
+ __poll_t mask = datagram_poll(file, sock, wait);
+ poll_wait(file, &so->wait, wait);
+
+ /* Check for false positives due to TX state */
+ if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE))
+ mask &= ~(EPOLLOUT | EPOLLWRNORM);
+
+ return mask;
+}
+
static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
@@ -1667,7 +1689,7 @@ static const struct proto_ops isotp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = isotp_getname,
- .poll = datagram_poll,
+ .poll = isotp_poll,
.ioctl = isotp_sock_no_ioctlcmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fb92c3609e17..fe3df23a2595 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -604,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
/* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data));
- memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+ /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb));
+
+ memcpy(skb->cb, re_skcb, sizeof(*re_skcb));
skcb = j1939_skb_to_cb(skb);
if (swap_src_dst)
j1939_skbcb_swap(skcb);
diff --git a/net/compat.c b/net/compat.c
index 161b7bea1f62..6564720f32b7 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -113,7 +113,7 @@ int get_compat_msghdr(struct msghdr *kmsg,
#define CMSG_COMPAT_FIRSTHDR(msg) \
(((msg)->msg_controllen) >= sizeof(struct compat_cmsghdr) ? \
- (struct compat_cmsghdr __user *)((msg)->msg_control) : \
+ (struct compat_cmsghdr __user *)((msg)->msg_control_user) : \
(struct compat_cmsghdr __user *)NULL)
#define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \
@@ -126,7 +126,7 @@ static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *ms
struct compat_cmsghdr __user *cmsg, int cmsg_len)
{
char __user *ptr = (char __user *)cmsg + CMSG_COMPAT_ALIGN(cmsg_len);
- if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control) >
+ if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control_user) >
msg->msg_controllen)
return NULL;
return (struct compat_cmsghdr __user *)ptr;
@@ -211,6 +211,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
goto Einval;
/* Ok, looks like we made it. Hook it up and return success. */
+ kmsg->msg_control_is_user = false;
kmsg->msg_control = kcmsg_base;
kmsg->msg_controllen = kcmlen;
return 0;
@@ -225,7 +226,7 @@ Efault:
int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
{
- struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
+ struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control_user;
struct compat_cmsghdr cmhdr;
struct old_timeval32 ctv;
struct old_timespec32 cts[3];
@@ -274,7 +275,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
cmlen = CMSG_COMPAT_SPACE(len);
if (kmsg->msg_controllen < cmlen)
cmlen = kmsg->msg_controllen;
- kmsg->msg_control += cmlen;
+ kmsg->msg_control_user += cmlen;
kmsg->msg_controllen -= cmlen;
return 0;
}
@@ -289,7 +290,7 @@ static int scm_max_fds_compat(struct msghdr *msg)
void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
{
struct compat_cmsghdr __user *cm =
- (struct compat_cmsghdr __user *)msg->msg_control;
+ (struct compat_cmsghdr __user *)msg->msg_control_user;
unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count);
int __user *cmsg_data = CMSG_COMPAT_DATA(cm);
@@ -313,7 +314,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
if (msg->msg_controllen < cmlen)
cmlen = msg->msg_controllen;
- msg->msg_control += cmlen;
+ msg->msg_control_user += cmlen;
msg->msg_controllen -= cmlen;
}
}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 7a36353dbc22..d4172534dfa8 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), true);
+ bpf_selem_unlink(SELEM(sdata), false);
return 0;
}
@@ -49,7 +49,6 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_local_storage *sk_storage;
- bool free_sk_storage = false;
rcu_read_lock();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
@@ -58,13 +57,8 @@ void bpf_sk_storage_free(struct sock *sk)
return;
}
- raw_spin_lock_bh(&sk_storage->lock);
- free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage);
- raw_spin_unlock_bh(&sk_storage->lock);
+ bpf_local_storage_destroy(sk_storage);
rcu_read_unlock();
-
- if (free_sk_storage)
- kfree_rcu(sk_storage, rcu);
}
static void bpf_sk_storage_map_free(struct bpf_map *map)
@@ -74,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &sk_cache);
+ return bpf_local_storage_map_alloc(attr, &sk_cache, false);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key,
@@ -100,8 +94,8 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
return ERR_PTR(err);
}
-static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
{
struct bpf_local_storage_data *sdata;
struct socket *sock;
@@ -120,7 +114,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
return err;
}
-static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
+static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
{
struct socket *sock;
int fd, err;
@@ -203,7 +197,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
} else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) {
- kfree(copy_selem);
+ bpf_selem_free(copy_selem, smap, true);
atomic_sub(smap->elem_size,
&newsk->sk_omem_alloc);
bpf_map_put(map);
@@ -418,7 +412,7 @@ const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
@@ -430,7 +424,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.allowed = bpf_sk_storage_tracing_allowed,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ce5985be84b..1551aabac343 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1612,7 +1612,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
- N(XDP_FEAT_CHANGE) N(PRE_CHANGE_HWTSTAMP)
+ N(XDP_FEAT_CHANGE)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -3197,6 +3197,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
}
if (skb_rx_queue_recorded(skb)) {
+ DEBUG_NET_WARN_ON_ONCE(qcount == 0);
hash = skb_get_rx_queue(skb);
if (hash >= qoffset)
hash -= qoffset;
@@ -3314,8 +3315,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
skb->len - start, ~(__u32)0,
crc32c_csum_stub));
*(__le32 *)(skb->data + offset) = crc32c_csum;
- skb->ip_summed = CHECKSUM_NONE;
- skb->csum_not_inet = 0;
+ skb_reset_csum_not_inet(skb);
out:
return ret;
}
@@ -4358,6 +4358,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
list_add_tail(&napi->poll_list, &sd->poll_list);
+ WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
* we have to raise NET_RX_SOFTIRQ.
*/
@@ -5039,7 +5040,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
else
- __kfree_skb_defer(skb);
+ __napi_kfree_skb(skb,
+ get_kfree_skb_cb(skb)->reason);
}
}
@@ -6068,6 +6070,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
list_del_init(&n->poll_list);
local_irq_restore(flags);
}
+ WRITE_ONCE(n->list_owner, -1);
val = READ_ONCE(n->state);
do {
@@ -6383,6 +6386,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
#ifdef CONFIG_NETPOLL
napi->poll_owner = -1;
#endif
+ napi->list_owner = -1;
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
@@ -10870,7 +10874,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL, NULL, 0,
- portid, nlmsg_seq(nlh));
+ portid, nlh);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 6d772837eb3f..3730945ee294 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -7,7 +7,7 @@
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
-#include <net/dsa.h>
+#include <net/dsa_stubs.h>
#include <net/wext.h>
#include "dev.h"
@@ -259,9 +259,6 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- struct netdev_notifier_hwtstamp_info info = {
- .info.dev = dev,
- };
struct kernel_hwtstamp_config kernel_cfg;
struct netlink_ext_ack extack = {};
struct hwtstamp_config cfg;
@@ -276,12 +273,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
if (err)
return err;
- info.info.extack = &extack;
- info.config = &kernel_cfg;
-
- err = call_netdevice_notifiers_info(NETDEV_PRE_CHANGE_HWTSTAMP,
- &info.info);
- err = notifier_to_errno(err);
+ err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack);
if (err) {
if (extack._msg)
netdev_err(dev, "%s\n", extack._msg);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5a782d1d8fd3..aff31cd944c2 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
#include <linux/workqueue.h>
#include <linux/netlink.h>
#include <linux/net_dropmon.h>
+#include <linux/bitfield.h>
#include <linux/percpu.h>
#include <linux/timer.h>
#include <linux/bitops.h>
@@ -29,6 +30,7 @@
#include <net/genetlink.h>
#include <net/netevent.h>
#include <net/flow_offload.h>
+#include <net/dropreason.h>
#include <net/devlink.h>
#include <trace/events/skb.h>
@@ -504,8 +506,6 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
cb = NET_DM_SKB_CB(nskb);
cb->reason = reason;
cb->pc = location;
@@ -552,9 +552,9 @@ static size_t net_dm_in_port_size(void)
}
#define NET_DM_MAX_SYMBOL_LEN 40
+#define NET_DM_MAX_REASON_LEN 50
-static size_t net_dm_packet_report_size(size_t payload_len,
- enum skb_drop_reason reason)
+static size_t net_dm_packet_report_size(size_t payload_len)
{
size_t size;
@@ -576,7 +576,7 @@ static size_t net_dm_packet_report_size(size_t payload_len,
/* NET_DM_ATTR_PROTO */
nla_total_size(sizeof(u16)) +
/* NET_DM_ATTR_REASON */
- nla_total_size(strlen(drop_reasons[reason]) + 1) +
+ nla_total_size(NET_DM_MAX_REASON_LEN + 1) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -610,6 +610,8 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
size_t payload_len)
{
struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
+ const struct drop_reason_list *list = NULL;
+ unsigned int subsys, subsys_reason;
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
void *hdr;
@@ -627,9 +629,24 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
NET_DM_ATTR_PAD))
goto nla_put_failure;
+ rcu_read_lock();
+ subsys = u32_get_bits(cb->reason, SKB_DROP_REASON_SUBSYS_MASK);
+ if (subsys < SKB_DROP_REASON_SUBSYS_NUM)
+ list = rcu_dereference(drop_reasons_by_subsys[subsys]);
+ subsys_reason = cb->reason & ~SKB_DROP_REASON_SUBSYS_MASK;
+ if (!list ||
+ subsys_reason >= list->n_reasons ||
+ !list->reasons[subsys_reason] ||
+ strlen(list->reasons[subsys_reason]) > NET_DM_MAX_REASON_LEN) {
+ list = rcu_dereference(drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_CORE]);
+ subsys_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ }
if (nla_put_string(msg, NET_DM_ATTR_REASON,
- drop_reasons[cb->reason]))
+ list->reasons[subsys_reason])) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
+ rcu_read_unlock();
snprintf(buf, sizeof(buf), "%pS", cb->pc);
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
@@ -687,9 +704,7 @@ static void net_dm_packet_report(struct sk_buff *skb)
if (net_dm_trunc_len)
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
- msg = nlmsg_new(net_dm_packet_report_size(payload_len,
- NET_DM_SKB_CB(skb)->reason),
- GFP_KERNEL);
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
if (!msg)
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index a8c8fd96c822..df0df59814ae 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5002,7 +5002,7 @@ const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
.func = bpf_get_socket_ptr_cookie,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | PTR_MAYBE_NULL,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
@@ -8746,23 +8746,18 @@ EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ int off, int size)
{
int ret = -EACCES;
- if (atype == BPF_READ)
- return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
-
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
@@ -8829,17 +8824,13 @@ EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
static int xdp_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ int off, int size)
{
int ret = -EACCES;
- if (atype == BPF_READ)
- return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
-
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
@@ -9189,7 +9180,7 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
- PKT_VLAN_PRESENT_OFFSET);
+ SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK, 2);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
@@ -9236,7 +9227,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX;
- *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
@@ -9271,14 +9262,14 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX;
- *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
/* Writing __sk_buff->tstamp as ingress, goto <clear> */
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* goto <store> */
*insn++ = BPF_JMP_A(2);
/* <clear>: mono_delivery_time */
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
- *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
}
#endif
diff --git a/net/core/gro.c b/net/core/gro.c
index a606705a0859..2d84165cb4f1 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -633,7 +633,7 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi,
else if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
else
- __kfree_skb_defer(skb);
+ __napi_kfree_skb(skb, SKB_CONSUMED);
break;
case GRO_HELD:
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a089b704b986..e6a739b1afa9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work)
}
}
+static int netif_local_xmit_active(struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+ return 1;
+ }
+
+ return 0;
+}
+
static void poll_one_napi(struct napi_struct *napi)
{
int work;
@@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev)
if (!ni || down_trylock(&ni->dev_lock))
return;
- if (!netif_running(dev)) {
+ /* Some drivers will take the same locks in poll and xmit,
+ * we can't poll if local CPU is already in xmit.
+ */
+ if (!netif_running(dev) || netif_local_xmit_active(dev)) {
up(&ni->dev_lock);
return;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 193c18799865..e212e9d7edcb 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -19,6 +19,7 @@
#include <linux/mm.h> /* for put_page() */
#include <linux/poison.h>
#include <linux/ethtool.h>
+#include <linux/netdevice.h>
#include <trace/events/page_pool.h>
@@ -315,7 +316,8 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
*/
dma = dma_map_page_attrs(pool->p.dev, page, 0,
(PAGE_SIZE << pool->p.order),
- pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+ pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(pool->p.dev, dma))
return false;
@@ -483,7 +485,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
/* When page is unmapped, it cannot be returned to our pool */
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
- DMA_ATTR_SKIP_CPU_SYNC);
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
page_pool_clear_pp_info(page);
@@ -837,6 +839,21 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
pool->xdp_mem_id = mem->id;
}
+void page_pool_unlink_napi(struct page_pool *pool)
+{
+ if (!pool->p.napi)
+ return;
+
+ /* To avoid races with recycling and additional barriers make sure
+ * pool and NAPI are unlinked when NAPI is disabled.
+ */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) ||
+ READ_ONCE(pool->p.napi->list_owner) != -1);
+
+ WRITE_ONCE(pool->p.napi, NULL);
+}
+EXPORT_SYMBOL(page_pool_unlink_napi);
+
void page_pool_destroy(struct page_pool *pool)
{
if (!pool)
@@ -845,6 +862,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
+ page_pool_unlink_napi(pool);
page_pool_free_frag(pool);
if (!page_pool_release(pool))
@@ -874,9 +892,11 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
EXPORT_SYMBOL(page_pool_update_nid);
-bool page_pool_return_skb_page(struct page *page)
+bool page_pool_return_skb_page(struct page *page, bool napi_safe)
{
+ struct napi_struct *napi;
struct page_pool *pp;
+ bool allow_direct;
page = compound_head(page);
@@ -892,12 +912,20 @@ bool page_pool_return_skb_page(struct page *page)
pp = page->pp;
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ */
+ napi = READ_ONCE(pp->p.napi);
+ allow_direct = napi_safe && napi &&
+ READ_ONCE(napi->list_owner) == smp_processor_id();
+
/* Driver set this to memory recycling info. Reset it on recycle.
* This will *not* work for NIC using a split-page memory model.
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
- page_pool_put_full_page(pp, page, false);
+ page_pool_put_full_page(pp, page, allow_direct);
return true;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 906aebdc566b..653901a1bf75 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -61,7 +61,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 42
+#define RTNL_SLAVE_MAX_TYPE 43
struct rtnl_link {
rtnl_doit_func doit;
@@ -3975,16 +3975,23 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
u32 event, gfp_t flags, int *new_nsid,
- int new_ifindex, u32 portid, u32 seq)
+ int new_ifindex, u32 portid,
+ const struct nlmsghdr *nlh)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ u32 seq = 0;
skb = nlmsg_new(if_nlmsg_size(dev, 0), flags);
if (skb == NULL)
goto errout;
+ if (nlmsg_report(nlh))
+ seq = nlmsg_seq(nlh);
+ else
+ portid = 0;
+
err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
type, portid, seq, change, 0, 0, event,
new_nsid, new_ifindex, -1, flags);
@@ -4020,7 +4027,7 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
return;
skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
- new_ifindex, portid, nlmsg_seq(nlh));
+ new_ifindex, portid, nlh);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags, portid, nlh);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index acb7d776fa6e..3cd7dd377e53 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -250,7 +250,10 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
}
cmlen = min(CMSG_SPACE(len), msg->msg_controllen);
- msg->msg_control += cmlen;
+ if (msg->msg_control_is_user)
+ msg->msg_control_user += cmlen;
+ else
+ msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
return 0;
@@ -299,7 +302,7 @@ static int scm_max_fds(struct msghdr *msg)
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
struct cmsghdr __user *cm =
- (__force struct cmsghdr __user *)msg->msg_control;
+ (__force struct cmsghdr __user *)msg->msg_control_user;
unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
int __user *cmsg_data = CMSG_USER_DATA(cm);
@@ -332,7 +335,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
cmlen = CMSG_SPACE(i * sizeof(int));
if (msg->msg_controllen < cmlen)
cmlen = msg->msg_controllen;
- msg->msg_control += cmlen;
+ msg->msg_control_user += cmlen;
msg->msg_controllen -= cmlen;
}
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 050a875d09c5..0d998806b377 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -58,6 +58,7 @@
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <linux/prefetch.h>
+#include <linux/bitfield.h>
#include <linux/if_vlan.h>
#include <linux/mpls.h>
#include <linux/kcov.h>
@@ -72,6 +73,7 @@
#include <net/mptcp.h>
#include <net/mctp.h>
#include <net/page_pool.h>
+#include <net/dropreason.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -122,11 +124,59 @@ EXPORT_SYMBOL(sysctl_max_skb_frags);
#undef FN
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
-const char * const drop_reasons[] = {
+static const char * const drop_reasons[] = {
[SKB_CONSUMED] = "CONSUMED",
DEFINE_DROP_REASON(FN, FN)
};
-EXPORT_SYMBOL(drop_reasons);
+
+static const struct drop_reason_list drop_reasons_core = {
+ .reasons = drop_reasons,
+ .n_reasons = ARRAY_SIZE(drop_reasons),
+};
+
+const struct drop_reason_list __rcu *
+drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = {
+ [SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core),
+};
+EXPORT_SYMBOL(drop_reasons_by_subsys);
+
+/**
+ * drop_reasons_register_subsys - register another drop reason subsystem
+ * @subsys: the subsystem to register, must not be the core
+ * @list: the list of drop reasons within the subsystem, must point to
+ * a statically initialized list
+ */
+void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
+ const struct drop_reason_list *list)
+{
+ if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE ||
+ subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
+ "invalid subsystem %d\n", subsys))
+ return;
+
+ /* must point to statically allocated memory, so INIT is OK */
+ RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list);
+}
+EXPORT_SYMBOL_GPL(drop_reasons_register_subsys);
+
+/**
+ * drop_reasons_unregister_subsys - unregister a drop reason subsystem
+ * @subsys: the subsystem to remove, must not be the core
+ *
+ * Note: This will synchronize_rcu() to ensure no users when it returns.
+ */
+void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys)
+{
+ if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE ||
+ subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
+ "invalid subsystem %d\n", subsys))
+ return;
+
+ RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL);
+
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys);
/**
* skb_panic - private function for out-of-line support
@@ -839,11 +889,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool skb_pp_recycle(struct sk_buff *skb, void *data)
+static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return page_pool_return_skb_page(virt_to_page(data));
+ return page_pool_return_skb_page(virt_to_page(data), napi_safe);
}
static void skb_kfree_head(void *head, unsigned int end_offset)
@@ -856,12 +906,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset)
kfree(head);
}
-static void skb_free_head(struct sk_buff *skb)
+static void skb_free_head(struct sk_buff *skb, bool napi_safe)
{
unsigned char *head = skb->head;
if (skb->head_frag) {
- if (skb_pp_recycle(skb, head))
+ if (skb_pp_recycle(skb, head, napi_safe))
return;
skb_free_frag(head);
} else {
@@ -869,7 +919,8 @@ static void skb_free_head(struct sk_buff *skb)
}
}
-static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
+static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
+ bool napi_safe)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
@@ -888,13 +939,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
}
for (i = 0; i < shinfo->nr_frags; i++)
- __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
+ napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
free_head:
if (shinfo->frag_list)
kfree_skb_list_reason(shinfo->frag_list, reason);
- skb_free_head(skb);
+ skb_free_head(skb, napi_safe);
exit:
/* When we clone an SKB we copy the reycling bit. The pp_recycle
* bit is only set on the head though, so in order to avoid races
@@ -955,11 +1006,12 @@ void skb_release_head_state(struct sk_buff *skb)
}
/* Free everything but the sk_buff shell. */
-static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
+static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
+ bool napi_safe)
{
skb_release_head_state(skb);
if (likely(skb->head))
- skb_release_data(skb, reason);
+ skb_release_data(skb, reason, napi_safe);
}
/**
@@ -973,7 +1025,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
void __kfree_skb(struct sk_buff *skb)
{
- skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+ skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
kfree_skbmem(skb);
}
EXPORT_SYMBOL(__kfree_skb);
@@ -984,7 +1036,10 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
if (unlikely(!skb_unref(skb)))
return false;
- DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+ DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET ||
+ u32_get_bits(reason,
+ SKB_DROP_REASON_SUBSYS_MASK) >=
+ SKB_DROP_REASON_SUBSYS_NUM);
if (reason == SKB_CONSUMED)
trace_consume_skb(skb, __builtin_return_address(0));
@@ -1027,7 +1082,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
return;
}
- skb_release_all(skb, reason);
+ skb_release_all(skb, reason, false);
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
@@ -1201,7 +1256,7 @@ EXPORT_SYMBOL(consume_skb);
void __consume_stateless_skb(struct sk_buff *skb)
{
trace_consume_skb(skb, __builtin_return_address(0));
- skb_release_data(skb, SKB_CONSUMED);
+ skb_release_data(skb, SKB_CONSUMED, false);
kfree_skbmem(skb);
}
@@ -1224,9 +1279,9 @@ static void napi_skb_cache_put(struct sk_buff *skb)
}
}
-void __kfree_skb_defer(struct sk_buff *skb)
+void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
{
- skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+ skb_release_all(skb, reason, true);
napi_skb_cache_put(skb);
}
@@ -1264,7 +1319,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- skb_release_all(skb, SKB_CONSUMED);
+ skb_release_all(skb, SKB_CONSUMED, !!budget);
napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
@@ -1395,7 +1450,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
*/
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
{
- skb_release_all(dst, SKB_CONSUMED);
+ skb_release_all(dst, SKB_CONSUMED, false);
return __skb_clone(dst, src);
}
EXPORT_SYMBOL_GPL(skb_morph);
@@ -2018,9 +2073,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED);
+ skb_release_data(skb, SKB_CONSUMED, false);
} else {
- skb_free_head(skb);
+ skb_free_head(skb, false);
}
off = (data + nhead) - skb->head;
@@ -5187,6 +5242,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
}
EXPORT_SYMBOL_GPL(skb_tstamp_tx);
+#ifdef CONFIG_WIRELESS
void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
{
struct sock *sk = skb->sk;
@@ -5212,6 +5268,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
+#endif /* CONFIG_WIRELESS */
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
@@ -5597,18 +5654,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
- /* In general, avoid mixing slab allocated and page_pool allocated
- * pages within the same SKB. However when @to is not pp_recycle and
- * @from is cloned, we can transition frag pages from page_pool to
- * reference counted.
- *
- * On the other hand, don't allow coalescing two pp_recycle SKBs if
- * @from is cloned, in case the SKB is using page_pool fragment
+ /* In general, avoid mixing page_pool and non-page_pool allocated
+ * pages within the same SKB. Additionally avoid dealing with clones
+ * with page_pool pages, in case the SKB is using page_pool fragment
* references (PP_FLAG_PAGE_FRAG). Since we only take full page
* references for cloned SKBs at the moment that would result in
* inconsistent reference counts.
+ * In theory we could take full references if @from is cloned and
+ * !@to->pp_recycle but its tricky (due to potential race with
+ * the clone disappearing) and rare, so not worth dealing with.
*/
- if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
+ if (to->pp_recycle != from->pp_recycle ||
+ (from->pp_recycle && skb_cloned(from)))
return false;
if (len <= skb_tailroom(to)) {
@@ -6389,12 +6446,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED);
+ skb_release_data(skb, SKB_CONSUMED, false);
} else {
/* we can reuse existing recount- all we did was
* relocate values
*/
- skb_free_head(skb);
+ skb_free_head(skb, false);
}
skb->head = data;
@@ -6529,7 +6586,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_kfree_head(data, size);
return -ENOMEM;
}
- skb_release_data(skb, SKB_CONSUMED);
+ skb_release_data(skb, SKB_CONSUMED, false);
skb->head = data;
skb->head_frag = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index c25888795390..5440e67bcfe3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1396,15 +1396,10 @@ set_sndbuf:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- /* allow unprivileged users to decrease the value */
- if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN))
- ret = -EPERM;
- else {
- if (val < 0)
- ret = -EINVAL;
- else
- WRITE_ONCE(sk->sk_ll_usec, val);
- }
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ WRITE_ONCE(sk->sk_ll_usec, val);
break;
case SO_PREFER_BUSY_POLL:
if (valbool && !sockopt_capable(CAP_NET_ADMIN))
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9b854e236d23..7c189c2e2fbf 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -437,7 +437,7 @@ static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
__sock_map_delete(stab, sk, link_raw);
}
-static int sock_map_delete_elem(struct bpf_map *map, void *key)
+static long sock_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
u32 i = *(u32 *)key;
@@ -587,8 +587,8 @@ out:
return ret;
}
-static int sock_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static long sock_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
{
struct sock *sk = (struct sock *)value;
int ret;
@@ -925,7 +925,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
raw_spin_unlock_bh(&bucket->lock);
}
-static int sock_hash_delete_elem(struct bpf_map *map, void *key)
+static long sock_hash_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 hash, key_size = map->key_size;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 528d4b37983d..41e5ca8643ec 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -531,21 +531,6 @@ out:
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
-/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
-{
- struct xdp_mem_allocator *xa;
- struct page *page;
-
- rcu_read_lock();
- xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
- page = virt_to_head_page(data);
- if (xa)
- page_pool_release_page(xa->page_pool, page);
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(__xdp_release_frame);
-
void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
- /* Until page_pool get SKB return path, release DMA here */
- xdp_release_frame(xdpf);
+ if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
+ skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
xdp_scrub_frame(xdpf);
@@ -734,13 +719,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim
* bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
* @ctx: XDP context pointer.
* @hash: Return value pointer.
+ * @rss_type: Return value pointer for RSS type.
+ *
+ * The RSS hash type (@rss_type) specifies what portion of packet headers NIC
+ * hardware used when calculating RSS hash value. The RSS type can be decoded
+ * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits
+ * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types*
+ * ``XDP_RSS_TYPE_L*``.
*
* Return:
* * Returns 0 on success or ``-errno`` on error.
* * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc
* * ``-ENODATA`` : means no RX-hash available for this frame
*/
-__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
{
return -EOPNOTSUPP;
}
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index cc7e93a562fe..12e305824a96 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,4 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
+
+# the stubs are built-in whenever DSA is built-in or module
+ifdef CONFIG_NET_DSA
+obj-y := stubs.o
+endif
+
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += \
@@ -10,7 +16,8 @@ dsa_core-y += \
slave.o \
switch.o \
tag.o \
- tag_8021q.o
+ tag_8021q.o \
+ trace.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
@@ -31,3 +38,6 @@ obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
+
+# for tracing framework to find trace.h
+CFLAGS_trace.o := -I$(src)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index e5f156940c67..ab1afe67fd18 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -17,6 +17,7 @@
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
+#include <net/dsa_stubs.h>
#include <net/sch_generic.h>
#include "devlink.h"
@@ -1702,6 +1703,20 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db);
+static const struct dsa_stubs __dsa_stubs = {
+ .master_hwtstamp_validate = __dsa_master_hwtstamp_validate,
+};
+
+static void dsa_register_stubs(void)
+{
+ dsa_stubs = &__dsa_stubs;
+}
+
+static void dsa_unregister_stubs(void)
+{
+ dsa_stubs = NULL;
+}
+
static int __init dsa_init_module(void)
{
int rc;
@@ -1721,6 +1736,8 @@ static int __init dsa_init_module(void)
if (rc)
goto netlink_register_fail;
+ dsa_register_stubs();
+
return 0;
netlink_register_fail:
@@ -1735,6 +1752,8 @@ module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
+ dsa_unregister_stubs();
+
rtnl_link_unregister(&dsa_link_ops);
dsa_slave_unregister_notifier();
diff --git a/net/dsa/master.c b/net/dsa/master.c
index c2cabe6248b1..6be89ab0cc01 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -198,7 +198,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
/* Deny PTP operations on master if there is at least one switch in the tree
* that is PTP capable.
*/
-int dsa_master_pre_change_hwtstamp(struct net_device *dev,
+int __dsa_master_hwtstamp_validate(struct net_device *dev,
const struct kernel_hwtstamp_config *config,
struct netlink_ext_ack *extack)
{
diff --git a/net/dsa/master.h b/net/dsa/master.h
index 80842f4e27f7..76e39d3ec909 100644
--- a/net/dsa/master.h
+++ b/net/dsa/master.h
@@ -15,7 +15,7 @@ int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
struct netlink_ext_ack *extack);
void dsa_master_lag_teardown(struct net_device *lag_dev,
struct dsa_port *cpu_dp);
-int dsa_master_pre_change_hwtstamp(struct net_device *dev,
+int __dsa_master_hwtstamp_validate(struct net_device *dev,
const struct kernel_hwtstamp_config *config,
struct netlink_ext_ack *extack);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8abc1658ac47..165bb2cb8431 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -3289,7 +3289,6 @@ static int dsa_master_changeupper(struct net_device *dev,
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
- struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
@@ -3419,16 +3418,6 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
return NOTIFY_OK;
}
- case NETDEV_PRE_CHANGE_HWTSTAMP: {
- struct netdev_notifier_hwtstamp_info *info = ptr;
- int err;
-
- if (!netdev_uses_dsa(dev))
- return NOTIFY_DONE;
-
- err = dsa_master_pre_change_hwtstamp(dev, info->config, extack);
- return notifier_from_errno(err);
- }
default:
break;
}
diff --git a/net/dsa/stubs.c b/net/dsa/stubs.c
new file mode 100644
index 000000000000..2ed8a6c85fbf
--- /dev/null
+++ b/net/dsa/stubs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Stubs for DSA functionality called by the core network stack.
+ * These are necessary because CONFIG_NET_DSA can be a module, and built-in
+ * code cannot directly call symbols exported by modules.
+ */
+#include <net/dsa_stubs.h>
+
+const struct dsa_stubs *dsa_stubs;
+EXPORT_SYMBOL_GPL(dsa_stubs);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index d5bc4bb7310d..8c9a9f94b756 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -18,6 +18,7 @@
#include "slave.h"
#include "switch.h"
#include "tag_8021q.h"
+#include "trace.h"
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
@@ -164,14 +165,20 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_add(ds, port, mdb, db);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_mdb_add(ds, port, mdb, db);
+ trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err);
+
+ return err;
+ }
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (a) {
refcount_inc(&a->refcount);
+ trace_dsa_mdb_add_bump(dp, mdb->addr, mdb->vid, &db,
+ &a->refcount);
goto out;
}
@@ -182,6 +189,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
}
err = ds->ops->port_mdb_add(ds, port, mdb, db);
+ trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err);
if (err) {
kfree(a);
goto out;
@@ -209,21 +217,30 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_del(ds, port, mdb, db);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_mdb_del(ds, port, mdb, db);
+ trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err);
+
+ return err;
+ }
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (!a) {
+ trace_dsa_mdb_del_not_found(dp, mdb->addr, mdb->vid, &db);
err = -ENOENT;
goto out;
}
- if (!refcount_dec_and_test(&a->refcount))
+ if (!refcount_dec_and_test(&a->refcount)) {
+ trace_dsa_mdb_del_drop(dp, mdb->addr, mdb->vid, &db,
+ &a->refcount);
goto out;
+ }
err = ds->ops->port_mdb_del(ds, port, mdb, db);
+ trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -247,14 +264,19 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
int err = 0;
/* No need to bother with refcounting for user ports */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_add(ds, port, addr, vid, db);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
+ trace_dsa_fdb_add_hw(dp, addr, vid, &db, err);
+
+ return err;
+ }
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
+ trace_dsa_fdb_add_bump(dp, addr, vid, &db, &a->refcount);
goto out;
}
@@ -265,6 +287,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
}
err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
+ trace_dsa_fdb_add_hw(dp, addr, vid, &db, err);
if (err) {
kfree(a);
goto out;
@@ -291,21 +314,29 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
int err = 0;
/* No need to bother with refcounting for user ports */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_del(ds, port, addr, vid, db);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
+ trace_dsa_fdb_del_hw(dp, addr, vid, &db, err);
+
+ return err;
+ }
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (!a) {
+ trace_dsa_fdb_del_not_found(dp, addr, vid, &db);
err = -ENOENT;
goto out;
}
- if (!refcount_dec_and_test(&a->refcount))
+ if (!refcount_dec_and_test(&a->refcount)) {
+ trace_dsa_fdb_del_drop(dp, addr, vid, &db, &a->refcount);
goto out;
+ }
err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
+ trace_dsa_fdb_del_hw(dp, addr, vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -332,6 +363,8 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
+ trace_dsa_lag_fdb_add_bump(lag->dev, addr, vid, &db,
+ &a->refcount);
goto out;
}
@@ -342,6 +375,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
}
err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db);
+ trace_dsa_lag_fdb_add_hw(lag->dev, addr, vid, &db, err);
if (err) {
kfree(a);
goto out;
@@ -370,14 +404,19 @@ static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag,
a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
if (!a) {
+ trace_dsa_lag_fdb_del_not_found(lag->dev, addr, vid, &db);
err = -ENOENT;
goto out;
}
- if (!refcount_dec_and_test(&a->refcount))
+ if (!refcount_dec_and_test(&a->refcount)) {
+ trace_dsa_lag_fdb_del_drop(lag->dev, addr, vid, &db,
+ &a->refcount);
goto out;
+ }
err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db);
+ trace_dsa_lag_fdb_del_hw(lag->dev, addr, vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -656,8 +695,12 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports. */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_vlan_add(ds, port, vlan, extack);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_vlan_add(ds, port, vlan, extack);
+ trace_dsa_vlan_add_hw(dp, vlan, err);
+
+ return err;
+ }
/* No need to propagate on shared ports the existing VLANs that were
* re-notified after just the flags have changed. This would cause a
@@ -672,6 +715,7 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp,
v = dsa_vlan_find(&dp->vlans, vlan);
if (v) {
refcount_inc(&v->refcount);
+ trace_dsa_vlan_add_bump(dp, vlan, &v->refcount);
goto out;
}
@@ -682,6 +726,7 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp,
}
err = ds->ops->port_vlan_add(ds, port, vlan, extack);
+ trace_dsa_vlan_add_hw(dp, vlan, err);
if (err) {
kfree(v);
goto out;
@@ -706,21 +751,29 @@ static int dsa_port_do_vlan_del(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports */
- if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_vlan_del(ds, port, vlan);
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
+ err = ds->ops->port_vlan_del(ds, port, vlan);
+ trace_dsa_vlan_del_hw(dp, vlan, err);
+
+ return err;
+ }
mutex_lock(&dp->vlans_lock);
v = dsa_vlan_find(&dp->vlans, vlan);
if (!v) {
+ trace_dsa_vlan_del_not_found(dp, vlan);
err = -ENOENT;
goto out;
}
- if (!refcount_dec_and_test(&v->refcount))
+ if (!refcount_dec_and_test(&v->refcount)) {
+ trace_dsa_vlan_del_drop(dp, vlan, &v->refcount);
goto out;
+ }
err = ds->ops->port_vlan_del(ds, port, vlan);
+ trace_dsa_vlan_del_hw(dp, vlan, err);
if (err) {
refcount_set(&v->refcount, 1);
goto out;
diff --git a/net/dsa/trace.c b/net/dsa/trace.c
new file mode 100644
index 000000000000..1b107165d331
--- /dev/null
+++ b/net/dsa/trace.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2022-2023 NXP
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ])
+{
+ switch (db->type) {
+ case DSA_DB_PORT:
+ sprintf(buf, "port %s", db->dp->name);
+ break;
+ case DSA_DB_LAG:
+ sprintf(buf, "lag %s id %d", db->lag.dev->name, db->lag.id);
+ break;
+ case DSA_DB_BRIDGE:
+ sprintf(buf, "bridge %s num %d", db->bridge.dev->name,
+ db->bridge.num);
+ break;
+ default:
+ sprintf(buf, "unknown");
+ break;
+ }
+}
+
+const char *dsa_port_kind(const struct dsa_port *dp)
+{
+ switch (dp->type) {
+ case DSA_PORT_TYPE_USER:
+ return "user";
+ case DSA_PORT_TYPE_CPU:
+ return "cpu";
+ case DSA_PORT_TYPE_DSA:
+ return "dsa";
+ default:
+ return "unused";
+ }
+}
diff --git a/net/dsa/trace.h b/net/dsa/trace.h
new file mode 100644
index 000000000000..567f29a39707
--- /dev/null
+++ b/net/dsa/trace.h
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright 2022-2023 NXP
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dsa
+
+#if !defined(_NET_DSA_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _NET_DSA_TRACE_H
+
+#include <net/dsa.h>
+#include <net/switchdev.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/refcount.h>
+#include <linux/tracepoint.h>
+
+/* Enough to fit "bridge %s num %d" where num has 3 digits */
+#define DSA_DB_BUFSIZ (IFNAMSIZ + 16)
+
+void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ]);
+const char *dsa_port_kind(const struct dsa_port *dp);
+
+DECLARE_EVENT_CLASS(dsa_port_addr_op_hw,
+
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
+ const struct dsa_db *db, int err),
+
+ TP_ARGS(dp, addr, vid, db, err),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->err = err;
+ ),
+
+ TP_printk("%s %s port %d addr %pM vid %u db \"%s\" err %d",
+ __get_str(dev), __get_str(kind), __entry->port, __entry->addr,
+ __entry->vid, __entry->db_buf, __entry->err)
+);
+
+/* Add unicast/multicast address to hardware, either on user ports
+ * (where no refcounting is kept), or on shared ports when the entry
+ * is first seen and its refcount is 1.
+ */
+DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_add_hw,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+ TP_ARGS(dp, addr, vid, db, err));
+
+DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_add_hw,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+ TP_ARGS(dp, addr, vid, db, err));
+
+/* Delete unicast/multicast address from hardware, either on user ports or
+ * when the refcount on shared ports reaches 0
+ */
+DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_del_hw,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+ TP_ARGS(dp, addr, vid, db, err));
+
+DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_del_hw,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+ TP_ARGS(dp, addr, vid, db, err));
+
+DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount,
+
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
+ const struct dsa_db *db, const refcount_t *refcount),
+
+ TP_ARGS(dp, addr, vid, db, refcount),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(unsigned int, refcount)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->refcount = refcount_read(refcount);
+ ),
+
+ TP_printk("%s %s port %d addr %pM vid %u db \"%s\" refcount %u",
+ __get_str(dev), __get_str(kind), __entry->port, __entry->addr,
+ __entry->vid, __entry->db_buf, __entry->refcount)
+);
+
+/* Bump the refcount of an existing unicast/multicast address on shared ports */
+DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_add_bump,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db,
+ const refcount_t *refcount),
+ TP_ARGS(dp, addr, vid, db, refcount));
+
+DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_add_bump,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db,
+ const refcount_t *refcount),
+ TP_ARGS(dp, addr, vid, db, refcount));
+
+/* Drop the refcount of a multicast address that we still keep on
+ * shared ports
+ */
+DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_del_drop,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db,
+ const refcount_t *refcount),
+ TP_ARGS(dp, addr, vid, db, refcount));
+
+DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_del_drop,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db,
+ const refcount_t *refcount),
+ TP_ARGS(dp, addr, vid, db, refcount));
+
+DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found,
+
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
+ const struct dsa_db *db),
+
+ TP_ARGS(dp, addr, vid, db),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ ),
+
+ TP_printk("%s %s port %d addr %pM vid %u db \"%s\"",
+ __get_str(dev), __get_str(kind), __entry->port,
+ __entry->addr, __entry->vid, __entry->db_buf)
+);
+
+/* Attempt to delete a unicast/multicast address on shared ports for which
+ * the delete operation was called more times than the addition
+ */
+DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_fdb_del_not_found,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db),
+ TP_ARGS(dp, addr, vid, db));
+
+DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_mdb_del_not_found,
+ TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db),
+ TP_ARGS(dp, addr, vid, db));
+
+TRACE_EVENT(dsa_lag_fdb_add_hw,
+
+ TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+
+ TP_ARGS(lag_dev, addr, vid, db, err),
+
+ TP_STRUCT__entry(
+ __string(dev, lag_dev->name)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, lag_dev->name);
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->err = err;
+ ),
+
+ TP_printk("%s addr %pM vid %u db \"%s\" err %d",
+ __get_str(dev), __entry->addr, __entry->vid,
+ __entry->db_buf, __entry->err)
+);
+
+TRACE_EVENT(dsa_lag_fdb_add_bump,
+
+ TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, const refcount_t *refcount),
+
+ TP_ARGS(lag_dev, addr, vid, db, refcount),
+
+ TP_STRUCT__entry(
+ __string(dev, lag_dev->name)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(unsigned int, refcount)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, lag_dev->name);
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->refcount = refcount_read(refcount);
+ ),
+
+ TP_printk("%s addr %pM vid %u db \"%s\" refcount %u",
+ __get_str(dev), __entry->addr, __entry->vid,
+ __entry->db_buf, __entry->refcount)
+);
+
+TRACE_EVENT(dsa_lag_fdb_del_hw,
+
+ TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, int err),
+
+ TP_ARGS(lag_dev, addr, vid, db, err),
+
+ TP_STRUCT__entry(
+ __string(dev, lag_dev->name)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, lag_dev->name);
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->err = err;
+ ),
+
+ TP_printk("%s addr %pM vid %u db \"%s\" err %d",
+ __get_str(dev), __entry->addr, __entry->vid,
+ __entry->db_buf, __entry->err)
+);
+
+TRACE_EVENT(dsa_lag_fdb_del_drop,
+
+ TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db, const refcount_t *refcount),
+
+ TP_ARGS(lag_dev, addr, vid, db, refcount),
+
+ TP_STRUCT__entry(
+ __string(dev, lag_dev->name)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ __field(unsigned int, refcount)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, lag_dev->name);
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ __entry->refcount = refcount_read(refcount);
+ ),
+
+ TP_printk("%s addr %pM vid %u db \"%s\" refcount %u",
+ __get_str(dev), __entry->addr, __entry->vid,
+ __entry->db_buf, __entry->refcount)
+);
+
+TRACE_EVENT(dsa_lag_fdb_del_not_found,
+
+ TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
+ u16 vid, const struct dsa_db *db),
+
+ TP_ARGS(lag_dev, addr, vid, db),
+
+ TP_STRUCT__entry(
+ __string(dev, lag_dev->name)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __array(char, db_buf, DSA_DB_BUFSIZ)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, lag_dev->name);
+ ether_addr_copy(__entry->addr, addr);
+ __entry->vid = vid;
+ dsa_db_print(db, __entry->db_buf);
+ ),
+
+ TP_printk("%s addr %pM vid %u db \"%s\"",
+ __get_str(dev), __entry->addr, __entry->vid, __entry->db_buf)
+);
+
+DECLARE_EVENT_CLASS(dsa_vlan_op_hw,
+
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan, int err),
+
+ TP_ARGS(dp, vlan, err),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __field(u16, vid)
+ __field(u16, flags)
+ __field(bool, changed)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ __entry->vid = vlan->vid;
+ __entry->flags = vlan->flags;
+ __entry->changed = vlan->changed;
+ __entry->err = err;
+ ),
+
+ TP_printk("%s %s port %d vid %u%s%s%s",
+ __get_str(dev), __get_str(kind), __entry->port, __entry->vid,
+ __entry->flags & BRIDGE_VLAN_INFO_PVID ? " pvid" : "",
+ __entry->flags & BRIDGE_VLAN_INFO_UNTAGGED ? " untagged" : "",
+ __entry->changed ? " (changed)" : "")
+);
+
+DEFINE_EVENT(dsa_vlan_op_hw, dsa_vlan_add_hw,
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan, int err),
+ TP_ARGS(dp, vlan, err));
+
+DEFINE_EVENT(dsa_vlan_op_hw, dsa_vlan_del_hw,
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan, int err),
+ TP_ARGS(dp, vlan, err));
+
+DECLARE_EVENT_CLASS(dsa_vlan_op_refcount,
+
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ const refcount_t *refcount),
+
+ TP_ARGS(dp, vlan, refcount),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __field(u16, vid)
+ __field(u16, flags)
+ __field(bool, changed)
+ __field(unsigned int, refcount)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ __entry->vid = vlan->vid;
+ __entry->flags = vlan->flags;
+ __entry->changed = vlan->changed;
+ __entry->refcount = refcount_read(refcount);
+ ),
+
+ TP_printk("%s %s port %d vid %u%s%s%s refcount %u",
+ __get_str(dev), __get_str(kind), __entry->port, __entry->vid,
+ __entry->flags & BRIDGE_VLAN_INFO_PVID ? " pvid" : "",
+ __entry->flags & BRIDGE_VLAN_INFO_UNTAGGED ? " untagged" : "",
+ __entry->changed ? " (changed)" : "", __entry->refcount)
+);
+
+DEFINE_EVENT(dsa_vlan_op_refcount, dsa_vlan_add_bump,
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ const refcount_t *refcount),
+ TP_ARGS(dp, vlan, refcount));
+
+DEFINE_EVENT(dsa_vlan_op_refcount, dsa_vlan_del_drop,
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ const refcount_t *refcount),
+ TP_ARGS(dp, vlan, refcount));
+
+TRACE_EVENT(dsa_vlan_del_not_found,
+
+ TP_PROTO(const struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan),
+
+ TP_ARGS(dp, vlan),
+
+ TP_STRUCT__entry(
+ __string(dev, dev_name(dp->ds->dev))
+ __string(kind, dsa_port_kind(dp))
+ __field(int, port)
+ __field(u16, vid)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev, dev_name(dp->ds->dev));
+ __assign_str(kind, dsa_port_kind(dp));
+ __entry->port = dp->index;
+ __entry->vid = vlan->vid;
+ ),
+
+ TP_printk("%s %s port %d vid %u",
+ __get_str(dev), __get_str(kind), __entry->port, __entry->vid)
+);
+
+#endif /* _NET_DSA_TRACE_H */
+
+/* We don't want to use include/trace/events */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index fab66c169b9f..20165e07ef90 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -270,11 +270,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
"lanes configuration not supported by device");
return -EOPNOTSUPP;
}
- } else if (!lsettings->autoneg) {
- /* If autoneg is off and lanes parameter is not passed from user,
- * set the lanes parameter to 0.
+ } else if (!lsettings->autoneg && ksettings->lanes) {
+ /* If autoneg is off and lanes parameter is not passed from user but
+ * it was defined previously then set the lanes parameter to 0.
*/
ksettings->lanes = 0;
+ *mod = true;
}
ret = ethnl_update_bitset(ksettings->link_modes.advertising,
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
index fce3cc2734f9..4058a557b5a4 100644
--- a/net/ethtool/mm.c
+++ b/net/ethtool/mm.c
@@ -214,6 +214,16 @@ static int ethnl_set_mm(struct ethnl_req_info *req_info, struct genl_info *info)
return -ERANGE;
}
+ if (cfg.verify_enabled && !cfg.tx_enabled) {
+ NL_SET_ERR_MSG(extack, "Verification requires TX enabled");
+ return -EINVAL;
+ }
+
+ if (cfg.tx_enabled && !cfg.pmac_enabled) {
+ NL_SET_ERR_MSG(extack, "TX enabled requires pMAC enabled");
+ return -EINVAL;
+ }
+
ret = dev->ethtool_ops->set_mm(dev, &cfg, extack);
return ret < 0 ? ret : 1;
}
@@ -249,3 +259,26 @@ bool __ethtool_dev_mm_supported(struct net_device *dev)
return !ret;
}
+
+bool ethtool_dev_mm_supported(struct net_device *dev)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ bool supported;
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (!ops)
+ return false;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return false;
+
+ supported = __ethtool_dev_mm_supported(dev);
+
+ ethnl_ops_complete(dev);
+
+ return supported;
+}
+EXPORT_SYMBOL_GPL(ethtool_dev_mm_supported);
diff --git a/net/handshake/.kunitconfig b/net/handshake/.kunitconfig
new file mode 100644
index 000000000000..5c48cf4abca2
--- /dev/null
+++ b/net/handshake/.kunitconfig
@@ -0,0 +1,11 @@
+CONFIG_KUNIT=y
+CONFIG_UBSAN=y
+CONFIG_STACKTRACE=y
+CONFIG_NET=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_MULTIUSER=y
+CONFIG_NFS_FS=y
+CONFIG_SUNRPC=y
+CONFIG_NET_HANDSHAKE=y
+CONFIG_NET_HANDSHAKE_KUNIT_TEST=y
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
new file mode 100644
index 000000000000..247d73c6ff6e
--- /dev/null
+++ b/net/handshake/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Generic HANDSHAKE service
+#
+# Author: Chuck Lever <chuck.lever@oracle.com>
+#
+# Copyright (c) 2023, Oracle and/or its affiliates.
+#
+
+obj-y += handshake.o
+handshake-y := genl.o netlink.o request.o tlshd.o trace.o
+
+obj-$(CONFIG_NET_HANDSHAKE_KUNIT_TEST) += handshake-test.o
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
new file mode 100644
index 000000000000..9f29efb1493e
--- /dev/null
+++ b/net/handshake/genl.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "genl.h"
+
+#include <linux/handshake.h>
+
+/* HANDSHAKE_CMD_ACCEPT - do */
+static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = {
+ [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = NLA_POLICY_MAX(NLA_U32, 2),
+};
+
+/* HANDSHAKE_CMD_DONE - do */
+static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = {
+ [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, },
+ [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, },
+ [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, },
+};
+
+/* Ops table for handshake */
+static const struct genl_split_ops handshake_nl_ops[] = {
+ {
+ .cmd = HANDSHAKE_CMD_ACCEPT,
+ .doit = handshake_nl_accept_doit,
+ .policy = handshake_accept_nl_policy,
+ .maxattr = HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = HANDSHAKE_CMD_DONE,
+ .doit = handshake_nl_done_doit,
+ .policy = handshake_done_nl_policy,
+ .maxattr = HANDSHAKE_A_DONE_REMOTE_AUTH,
+ .flags = GENL_CMD_CAP_DO,
+ },
+};
+
+static const struct genl_multicast_group handshake_nl_mcgrps[] = {
+ [HANDSHAKE_NLGRP_NONE] = { "none", },
+ [HANDSHAKE_NLGRP_TLSHD] = { "tlshd", },
+};
+
+struct genl_family handshake_nl_family __ro_after_init = {
+ .name = HANDSHAKE_FAMILY_NAME,
+ .version = HANDSHAKE_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = handshake_nl_ops,
+ .n_split_ops = ARRAY_SIZE(handshake_nl_ops),
+ .mcgrps = handshake_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(handshake_nl_mcgrps),
+};
diff --git a/net/handshake/genl.h b/net/handshake/genl.h
new file mode 100644
index 000000000000..2c1f1aa6a02a
--- /dev/null
+++ b/net/handshake/genl.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_HANDSHAKE_GEN_H
+#define _LINUX_HANDSHAKE_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/handshake.h>
+
+int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info);
+int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+ HANDSHAKE_NLGRP_NONE,
+ HANDSHAKE_NLGRP_TLSHD,
+};
+
+extern struct genl_family handshake_nl_family;
+
+#endif /* _LINUX_HANDSHAKE_GEN_H */
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
new file mode 100644
index 000000000000..e6adc5dec11a
--- /dev/null
+++ b/net/handshake/handshake-test.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Oracle and/or its affiliates.
+ *
+ * KUnit test of the handshake upcall mechanism.
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include <linux/kernel.h>
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static int test_accept_func(struct handshake_req *req, struct genl_info *info,
+ int fd)
+{
+ return 0;
+}
+
+static void test_done_func(struct handshake_req *req, unsigned int status,
+ struct genl_info *info)
+{
+}
+
+struct handshake_req_alloc_test_param {
+ const char *desc;
+ struct handshake_proto *proto;
+ gfp_t gfp;
+ bool expect_success;
+};
+
+static struct handshake_proto handshake_req_alloc_proto_2 = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_NONE,
+};
+
+static struct handshake_proto handshake_req_alloc_proto_3 = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_MAX,
+};
+
+static struct handshake_proto handshake_req_alloc_proto_4 = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+};
+
+static struct handshake_proto handshake_req_alloc_proto_5 = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+ .hp_accept = test_accept_func,
+};
+
+static struct handshake_proto handshake_req_alloc_proto_6 = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+ .hp_privsize = UINT_MAX,
+ .hp_accept = test_accept_func,
+ .hp_done = test_done_func,
+};
+
+static struct handshake_proto handshake_req_alloc_proto_good = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+ .hp_accept = test_accept_func,
+ .hp_done = test_done_func,
+};
+
+static const
+struct handshake_req_alloc_test_param handshake_req_alloc_params[] = {
+ {
+ .desc = "handshake_req_alloc NULL proto",
+ .proto = NULL,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc CLASS_NONE",
+ .proto = &handshake_req_alloc_proto_2,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc CLASS_MAX",
+ .proto = &handshake_req_alloc_proto_3,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc no callbacks",
+ .proto = &handshake_req_alloc_proto_4,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc no done callback",
+ .proto = &handshake_req_alloc_proto_5,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc excessive privsize",
+ .proto = &handshake_req_alloc_proto_6,
+ .gfp = GFP_KERNEL,
+ .expect_success = false,
+ },
+ {
+ .desc = "handshake_req_alloc all good",
+ .proto = &handshake_req_alloc_proto_good,
+ .gfp = GFP_KERNEL,
+ .expect_success = true,
+ },
+};
+
+static void
+handshake_req_alloc_get_desc(const struct handshake_req_alloc_test_param *param,
+ char *desc)
+{
+ strscpy(desc, param->desc, KUNIT_PARAM_DESC_SIZE);
+}
+
+/* Creates the function handshake_req_alloc_gen_params */
+KUNIT_ARRAY_PARAM(handshake_req_alloc, handshake_req_alloc_params,
+ handshake_req_alloc_get_desc);
+
+static void handshake_req_alloc_case(struct kunit *test)
+{
+ const struct handshake_req_alloc_test_param *param = test->param_value;
+ struct handshake_req *result;
+
+ /* Arrange */
+
+ /* Act */
+ result = handshake_req_alloc(param->proto, param->gfp);
+
+ /* Assert */
+ if (param->expect_success)
+ KUNIT_EXPECT_NOT_NULL(test, result);
+ else
+ KUNIT_EXPECT_NULL(test, result);
+
+ kfree(result);
+}
+
+static void handshake_req_submit_test1(struct kunit *test)
+{
+ struct socket *sock;
+ int err, result;
+
+ /* Arrange */
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ result = handshake_req_submit(sock, NULL, GFP_KERNEL);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ(test, result, -EINVAL);
+
+ sock_release(sock);
+}
+
+static void handshake_req_submit_test2(struct kunit *test)
+{
+ struct handshake_req *req;
+ int result;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ /* Act */
+ result = handshake_req_submit(NULL, req, GFP_KERNEL);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ(test, result, -EINVAL);
+
+ /* handshake_req_submit() destroys @req on error */
+}
+
+static void handshake_req_submit_test3(struct kunit *test)
+{
+ struct handshake_req *req;
+ struct socket *sock;
+ int err, result;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ sock->file = NULL;
+
+ /* Act */
+ result = handshake_req_submit(sock, req, GFP_KERNEL);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ(test, result, -EINVAL);
+
+ /* handshake_req_submit() destroys @req on error */
+ sock_release(sock);
+}
+
+static void handshake_req_submit_test4(struct kunit *test)
+{
+ struct handshake_req *req, *result;
+ struct socket *sock;
+ int err;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ result = handshake_req_hash_lookup(sock->sk);
+
+ /* Assert */
+ KUNIT_EXPECT_NOT_NULL(test, result);
+ KUNIT_EXPECT_PTR_EQ(test, req, result);
+
+ handshake_req_cancel(sock->sk);
+ sock_release(sock);
+}
+
+static void handshake_req_submit_test5(struct kunit *test)
+{
+ struct handshake_req *req;
+ struct handshake_net *hn;
+ struct socket *sock;
+ struct net *net;
+ int saved, err;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+
+ net = sock_net(sock->sk);
+ hn = handshake_pernet(net);
+ KUNIT_ASSERT_NOT_NULL(test, hn);
+
+ saved = hn->hn_pending;
+ hn->hn_pending = hn->hn_pending_max + 1;
+
+ /* Act */
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ(test, err, -EAGAIN);
+
+ sock_release(sock);
+ hn->hn_pending = saved;
+}
+
+static void handshake_req_submit_test6(struct kunit *test)
+{
+ struct handshake_req *req1, *req2;
+ struct socket *sock;
+ int err;
+
+ /* Arrange */
+ req1 = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req1);
+ req2 = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req2);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+
+ /* Act */
+ err = handshake_req_submit(sock, req1, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ err = handshake_req_submit(sock, req2, GFP_KERNEL);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ(test, err, -EBUSY);
+
+ handshake_req_cancel(sock->sk);
+ sock_release(sock);
+}
+
+static void handshake_req_cancel_test1(struct kunit *test)
+{
+ struct handshake_req *req;
+ struct socket *sock;
+ bool result;
+ int err;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* NB: handshake_req hasn't been accepted */
+
+ /* Act */
+ result = handshake_req_cancel(sock->sk);
+
+ /* Assert */
+ KUNIT_EXPECT_TRUE(test, result);
+
+ sock_release(sock);
+}
+
+static void handshake_req_cancel_test2(struct kunit *test)
+{
+ struct handshake_req *req, *next;
+ struct handshake_net *hn;
+ struct socket *sock;
+ struct net *net;
+ bool result;
+ int err;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ net = sock_net(sock->sk);
+ hn = handshake_pernet(net);
+ KUNIT_ASSERT_NOT_NULL(test, hn);
+
+ /* Pretend to accept this request */
+ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD);
+ KUNIT_ASSERT_PTR_EQ(test, req, next);
+
+ /* Act */
+ result = handshake_req_cancel(sock->sk);
+
+ /* Assert */
+ KUNIT_EXPECT_TRUE(test, result);
+
+ sock_release(sock);
+}
+
+static void handshake_req_cancel_test3(struct kunit *test)
+{
+ struct handshake_req *req, *next;
+ struct handshake_net *hn;
+ struct socket *sock;
+ struct net *net;
+ bool result;
+ int err;
+
+ /* Arrange */
+ req = handshake_req_alloc(&handshake_req_alloc_proto_good, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ net = sock_net(sock->sk);
+ hn = handshake_pernet(net);
+ KUNIT_ASSERT_NOT_NULL(test, hn);
+
+ /* Pretend to accept this request */
+ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD);
+ KUNIT_ASSERT_PTR_EQ(test, req, next);
+
+ /* Pretend to complete this request */
+ handshake_complete(next, -ETIMEDOUT, NULL);
+
+ /* Act */
+ result = handshake_req_cancel(sock->sk);
+
+ /* Assert */
+ KUNIT_EXPECT_FALSE(test, result);
+
+ sock_release(sock);
+}
+
+static struct handshake_req *handshake_req_destroy_test;
+
+static void test_destroy_func(struct handshake_req *req)
+{
+ handshake_req_destroy_test = req;
+}
+
+static struct handshake_proto handshake_req_alloc_proto_destroy = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+ .hp_accept = test_accept_func,
+ .hp_done = test_done_func,
+ .hp_destroy = test_destroy_func,
+};
+
+static void handshake_req_destroy_test1(struct kunit *test)
+{
+ struct handshake_req *req;
+ struct socket *sock;
+ int err;
+
+ /* Arrange */
+ handshake_req_destroy_test = NULL;
+
+ req = handshake_req_alloc(&handshake_req_alloc_proto_destroy, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, req);
+
+ err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &sock, 1);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+
+ err = handshake_req_submit(sock, req, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ handshake_req_cancel(sock->sk);
+
+ /* Act */
+ sock_release(sock);
+
+ /* Assert */
+ KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
+}
+
+static struct kunit_case handshake_api_test_cases[] = {
+ {
+ .name = "req_alloc API fuzzing",
+ .run_case = handshake_req_alloc_case,
+ .generate_params = handshake_req_alloc_gen_params,
+ },
+ {
+ .name = "req_submit NULL req arg",
+ .run_case = handshake_req_submit_test1,
+ },
+ {
+ .name = "req_submit NULL sock arg",
+ .run_case = handshake_req_submit_test2,
+ },
+ {
+ .name = "req_submit NULL sock->file",
+ .run_case = handshake_req_submit_test3,
+ },
+ {
+ .name = "req_lookup works",
+ .run_case = handshake_req_submit_test4,
+ },
+ {
+ .name = "req_submit max pending",
+ .run_case = handshake_req_submit_test5,
+ },
+ {
+ .name = "req_submit multiple",
+ .run_case = handshake_req_submit_test6,
+ },
+ {
+ .name = "req_cancel before accept",
+ .run_case = handshake_req_cancel_test1,
+ },
+ {
+ .name = "req_cancel after accept",
+ .run_case = handshake_req_cancel_test2,
+ },
+ {
+ .name = "req_cancel after done",
+ .run_case = handshake_req_cancel_test3,
+ },
+ {
+ .name = "req_destroy works",
+ .run_case = handshake_req_destroy_test1,
+ },
+ {}
+};
+
+static struct kunit_suite handshake_api_suite = {
+ .name = "Handshake API tests",
+ .test_cases = handshake_api_test_cases,
+};
+
+kunit_test_suites(&handshake_api_suite);
+
+MODULE_DESCRIPTION("Test handshake upcall API functions");
+MODULE_LICENSE("GPL");
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
new file mode 100644
index 000000000000..4dac965c99df
--- /dev/null
+++ b/net/handshake/handshake.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic netlink handshake service
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _INTERNAL_HANDSHAKE_H
+#define _INTERNAL_HANDSHAKE_H
+
+/* Per-net namespace context */
+struct handshake_net {
+ spinlock_t hn_lock; /* protects next 3 fields */
+ int hn_pending;
+ int hn_pending_max;
+ struct list_head hn_requests;
+
+ unsigned long hn_flags;
+};
+
+enum hn_flags_bits {
+ HANDSHAKE_F_NET_DRAINING,
+};
+
+struct handshake_proto;
+
+/* One handshake request */
+struct handshake_req {
+ struct list_head hr_list;
+ struct rhash_head hr_rhash;
+ unsigned long hr_flags;
+ const struct handshake_proto *hr_proto;
+ struct sock *hr_sk;
+ void (*hr_odestruct)(struct sock *sk);
+
+ /* Always the last field */
+ char hr_priv[];
+};
+
+enum hr_flags_bits {
+ HANDSHAKE_F_REQ_COMPLETED,
+};
+
+/* Invariants for all handshake requests for one transport layer
+ * security protocol
+ */
+struct handshake_proto {
+ int hp_handler_class;
+ size_t hp_privsize;
+ unsigned long hp_flags;
+
+ int (*hp_accept)(struct handshake_req *req,
+ struct genl_info *info, int fd);
+ void (*hp_done)(struct handshake_req *req,
+ unsigned int status,
+ struct genl_info *info);
+ void (*hp_destroy)(struct handshake_req *req);
+};
+
+enum hp_flags_bits {
+ HANDSHAKE_F_PROTO_NOTIFY,
+};
+
+/* netlink.c */
+int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
+ gfp_t flags);
+struct nlmsghdr *handshake_genl_put(struct sk_buff *msg,
+ struct genl_info *info);
+struct handshake_net *handshake_pernet(struct net *net);
+
+/* request.c */
+struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto,
+ gfp_t flags);
+int handshake_req_hash_init(void);
+void handshake_req_hash_destroy(void);
+void *handshake_req_private(struct handshake_req *req);
+struct handshake_req *handshake_req_hash_lookup(struct sock *sk);
+struct handshake_req *handshake_req_next(struct handshake_net *hn, int class);
+int handshake_req_submit(struct socket *sock, struct handshake_req *req,
+ gfp_t flags);
+void handshake_complete(struct handshake_req *req, unsigned int status,
+ struct genl_info *info);
+bool handshake_req_cancel(struct sock *sk);
+
+#endif /* _INTERNAL_HANDSHAKE_H */
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
new file mode 100644
index 000000000000..8ea0ff993f9f
--- /dev/null
+++ b/net/handshake/netlink.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic netlink handshake service
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include <kunit/visibility.h>
+
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+#include "genl.h"
+
+#include <trace/events/handshake.h>
+
+/**
+ * handshake_genl_notify - Notify handlers that a request is waiting
+ * @net: target network namespace
+ * @proto: handshake protocol
+ * @flags: memory allocation control flags
+ *
+ * Returns zero on success or a negative errno if notification failed.
+ */
+int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
+ gfp_t flags)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ /* Disable notifications during unit testing */
+ if (!test_bit(HANDSHAKE_F_PROTO_NOTIFY, &proto->hp_flags))
+ return 0;
+
+ if (!genl_has_listeners(&handshake_nl_family, net,
+ proto->hp_handler_class))
+ return -ESRCH;
+
+ msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0,
+ HANDSHAKE_CMD_READY);
+ if (!hdr)
+ goto out_free;
+
+ if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+ proto->hp_handler_class) < 0) {
+ genlmsg_cancel(msg, hdr);
+ goto out_free;
+ }
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_multicast_netns(&handshake_nl_family, net, msg,
+ 0, proto->hp_handler_class, flags);
+
+out_free:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+/**
+ * handshake_genl_put - Create a generic netlink message header
+ * @msg: buffer in which to create the header
+ * @info: generic netlink message context
+ *
+ * Returns a ready-to-use header, or NULL.
+ */
+struct nlmsghdr *handshake_genl_put(struct sk_buff *msg,
+ struct genl_info *info)
+{
+ return genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &handshake_nl_family, 0, info->genlhdr->cmd);
+}
+EXPORT_SYMBOL(handshake_genl_put);
+
+/*
+ * dup() a kernel socket for use as a user space file descriptor
+ * in the current process. The kernel socket must have an
+ * instatiated struct file.
+ *
+ * Implicit argument: "current()"
+ */
+static int handshake_dup(struct socket *sock)
+{
+ struct file *file;
+ int newfd;
+
+ if (!sock->file)
+ return -EBADF;
+
+ file = get_file(sock->file);
+ newfd = get_unused_fd_flags(O_CLOEXEC);
+ if (newfd < 0) {
+ fput(file);
+ return newfd;
+ }
+
+ fd_install(newfd, file);
+ return newfd;
+}
+
+int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct handshake_net *hn = handshake_pernet(net);
+ struct handshake_req *req = NULL;
+ struct socket *sock;
+ int class, fd, err;
+
+ err = -EOPNOTSUPP;
+ if (!hn)
+ goto out_status;
+
+ err = -EINVAL;
+ if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS))
+ goto out_status;
+ class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]);
+
+ err = -EAGAIN;
+ req = handshake_req_next(hn, class);
+ if (!req)
+ goto out_status;
+
+ sock = req->hr_sk->sk_socket;
+ fd = handshake_dup(sock);
+ if (fd < 0) {
+ err = fd;
+ goto out_complete;
+ }
+ err = req->hr_proto->hp_accept(req, info, fd);
+ if (err)
+ goto out_complete;
+
+ trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
+ return 0;
+
+out_complete:
+ handshake_complete(req, -EIO, NULL);
+ fput(sock->file);
+out_status:
+ trace_handshake_cmd_accept_err(net, req, NULL, err);
+ return err;
+}
+
+int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct socket *sock = NULL;
+ struct handshake_req *req;
+ int fd, status, err;
+
+ if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
+ return -EINVAL;
+ fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
+
+ err = 0;
+ sock = sockfd_lookup(fd, &err);
+ if (err) {
+ err = -EBADF;
+ goto out_status;
+ }
+
+ req = handshake_req_hash_lookup(sock->sk);
+ if (!req) {
+ err = -EBUSY;
+ fput(sock->file);
+ goto out_status;
+ }
+
+ trace_handshake_cmd_done(net, req, sock->sk, fd);
+
+ status = -EIO;
+ if (info->attrs[HANDSHAKE_A_DONE_STATUS])
+ status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]);
+
+ handshake_complete(req, status, info);
+ fput(sock->file);
+ return 0;
+
+out_status:
+ trace_handshake_cmd_done_err(net, req, sock->sk, err);
+ return err;
+}
+
+static unsigned int handshake_net_id;
+
+static int __net_init handshake_net_init(struct net *net)
+{
+ struct handshake_net *hn = net_generic(net, handshake_net_id);
+ unsigned long tmp;
+ struct sysinfo si;
+
+ /*
+ * Arbitrary limit to prevent handshakes that do not make
+ * progress from clogging up the system. The cap scales up
+ * with the amount of physical memory on the system.
+ */
+ si_meminfo(&si);
+ tmp = si.totalram / (25 * si.mem_unit);
+ hn->hn_pending_max = clamp(tmp, 3UL, 50UL);
+
+ spin_lock_init(&hn->hn_lock);
+ hn->hn_pending = 0;
+ hn->hn_flags = 0;
+ INIT_LIST_HEAD(&hn->hn_requests);
+ return 0;
+}
+
+static void __net_exit handshake_net_exit(struct net *net)
+{
+ struct handshake_net *hn = net_generic(net, handshake_net_id);
+ struct handshake_req *req;
+ LIST_HEAD(requests);
+
+ /*
+ * Drain the net's pending list. Requests that have been
+ * accepted and are in progress will be destroyed when
+ * the socket is closed.
+ */
+ spin_lock(&hn->hn_lock);
+ set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags);
+ list_splice_init(&requests, &hn->hn_requests);
+ spin_unlock(&hn->hn_lock);
+
+ while (!list_empty(&requests)) {
+ req = list_first_entry(&requests, struct handshake_req, hr_list);
+ list_del(&req->hr_list);
+
+ /*
+ * Requests on this list have not yet been
+ * accepted, so they do not have an fd to put.
+ */
+
+ handshake_complete(req, -ETIMEDOUT, NULL);
+ }
+}
+
+static struct pernet_operations __net_initdata handshake_genl_net_ops = {
+ .init = handshake_net_init,
+ .exit = handshake_net_exit,
+ .id = &handshake_net_id,
+ .size = sizeof(struct handshake_net),
+};
+
+/**
+ * handshake_pernet - Get the handshake private per-net structure
+ * @net: network namespace
+ *
+ * Returns a pointer to the net's private per-net structure for the
+ * handshake module, or NULL if handshake_init() failed.
+ */
+struct handshake_net *handshake_pernet(struct net *net)
+{
+ return handshake_net_id ?
+ net_generic(net, handshake_net_id) : NULL;
+}
+EXPORT_SYMBOL_IF_KUNIT(handshake_pernet);
+
+static int __init handshake_init(void)
+{
+ int ret;
+
+ ret = handshake_req_hash_init();
+ if (ret) {
+ pr_warn("handshake: hash initialization failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = genl_register_family(&handshake_nl_family);
+ if (ret) {
+ pr_warn("handshake: netlink registration failed (%d)\n", ret);
+ handshake_req_hash_destroy();
+ return ret;
+ }
+
+ /*
+ * ORDER: register_pernet_subsys must be done last.
+ *
+ * If initialization does not make it past pernet_subsys
+ * registration, then handshake_net_id will remain 0. That
+ * shunts the handshake consumer API to return ENOTSUPP
+ * to prevent it from dereferencing something that hasn't
+ * been allocated.
+ */
+ ret = register_pernet_subsys(&handshake_genl_net_ops);
+ if (ret) {
+ pr_warn("handshake: pernet registration failed (%d)\n", ret);
+ genl_unregister_family(&handshake_nl_family);
+ handshake_req_hash_destroy();
+ }
+
+ return ret;
+}
+
+static void __exit handshake_exit(void)
+{
+ unregister_pernet_subsys(&handshake_genl_net_ops);
+ handshake_net_id = 0;
+
+ handshake_req_hash_destroy();
+ genl_unregister_family(&handshake_nl_family);
+}
+
+module_init(handshake_init);
+module_exit(handshake_exit);
diff --git a/net/handshake/request.c b/net/handshake/request.c
new file mode 100644
index 000000000000..94d5cef3e048
--- /dev/null
+++ b/net/handshake/request.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handshake request lifetime events
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/fdtable.h>
+#include <linux/rhashtable.h>
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include <kunit/visibility.h>
+
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+
+#include <trace/events/handshake.h>
+
+/*
+ * We need both a handshake_req -> sock mapping, and a sock ->
+ * handshake_req mapping. Both are one-to-one.
+ *
+ * To avoid adding another pointer field to struct sock, net/handshake
+ * maintains a hash table, indexed by the memory address of @sock, to
+ * find the struct handshake_req outstanding for that socket. The
+ * reverse direction uses a simple pointer field in the handshake_req
+ * struct.
+ */
+
+static struct rhashtable handshake_rhashtbl ____cacheline_aligned_in_smp;
+
+static const struct rhashtable_params handshake_rhash_params = {
+ .key_len = sizeof_field(struct handshake_req, hr_sk),
+ .key_offset = offsetof(struct handshake_req, hr_sk),
+ .head_offset = offsetof(struct handshake_req, hr_rhash),
+ .automatic_shrinking = true,
+};
+
+int handshake_req_hash_init(void)
+{
+ return rhashtable_init(&handshake_rhashtbl, &handshake_rhash_params);
+}
+
+void handshake_req_hash_destroy(void)
+{
+ rhashtable_destroy(&handshake_rhashtbl);
+}
+
+struct handshake_req *handshake_req_hash_lookup(struct sock *sk)
+{
+ return rhashtable_lookup_fast(&handshake_rhashtbl, &sk,
+ handshake_rhash_params);
+}
+EXPORT_SYMBOL_IF_KUNIT(handshake_req_hash_lookup);
+
+static bool handshake_req_hash_add(struct handshake_req *req)
+{
+ int ret;
+
+ ret = rhashtable_lookup_insert_fast(&handshake_rhashtbl,
+ &req->hr_rhash,
+ handshake_rhash_params);
+ return ret == 0;
+}
+
+static void handshake_req_destroy(struct handshake_req *req)
+{
+ if (req->hr_proto->hp_destroy)
+ req->hr_proto->hp_destroy(req);
+ rhashtable_remove_fast(&handshake_rhashtbl, &req->hr_rhash,
+ handshake_rhash_params);
+ kfree(req);
+}
+
+static void handshake_sk_destruct(struct sock *sk)
+{
+ void (*sk_destruct)(struct sock *sk);
+ struct handshake_req *req;
+
+ req = handshake_req_hash_lookup(sk);
+ if (!req)
+ return;
+
+ trace_handshake_destruct(sock_net(sk), req, sk);
+ sk_destruct = req->hr_odestruct;
+ handshake_req_destroy(req);
+ if (sk_destruct)
+ sk_destruct(sk);
+}
+
+/**
+ * handshake_req_alloc - Allocate a handshake request
+ * @proto: security protocol
+ * @flags: memory allocation flags
+ *
+ * Returns an initialized handshake_req or NULL.
+ */
+struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto,
+ gfp_t flags)
+{
+ struct handshake_req *req;
+
+ if (!proto)
+ return NULL;
+ if (proto->hp_handler_class <= HANDSHAKE_HANDLER_CLASS_NONE)
+ return NULL;
+ if (proto->hp_handler_class >= HANDSHAKE_HANDLER_CLASS_MAX)
+ return NULL;
+ if (!proto->hp_accept || !proto->hp_done)
+ return NULL;
+
+ req = kzalloc(struct_size(req, hr_priv, proto->hp_privsize), flags);
+ if (!req)
+ return NULL;
+
+ INIT_LIST_HEAD(&req->hr_list);
+ req->hr_proto = proto;
+ return req;
+}
+EXPORT_SYMBOL(handshake_req_alloc);
+
+/**
+ * handshake_req_private - Get per-handshake private data
+ * @req: handshake arguments
+ *
+ */
+void *handshake_req_private(struct handshake_req *req)
+{
+ return (void *)&req->hr_priv;
+}
+EXPORT_SYMBOL(handshake_req_private);
+
+static bool __add_pending_locked(struct handshake_net *hn,
+ struct handshake_req *req)
+{
+ if (WARN_ON_ONCE(!list_empty(&req->hr_list)))
+ return false;
+ hn->hn_pending++;
+ list_add_tail(&req->hr_list, &hn->hn_requests);
+ return true;
+}
+
+static void __remove_pending_locked(struct handshake_net *hn,
+ struct handshake_req *req)
+{
+ hn->hn_pending--;
+ list_del_init(&req->hr_list);
+}
+
+/*
+ * Returns %true if the request was found on @net's pending list,
+ * otherwise %false.
+ *
+ * If @req was on a pending list, it has not yet been accepted.
+ */
+static bool remove_pending(struct handshake_net *hn, struct handshake_req *req)
+{
+ bool ret = false;
+
+ spin_lock(&hn->hn_lock);
+ if (!list_empty(&req->hr_list)) {
+ __remove_pending_locked(hn, req);
+ ret = true;
+ }
+ spin_unlock(&hn->hn_lock);
+
+ return ret;
+}
+
+struct handshake_req *handshake_req_next(struct handshake_net *hn, int class)
+{
+ struct handshake_req *req, *pos;
+
+ req = NULL;
+ spin_lock(&hn->hn_lock);
+ list_for_each_entry(pos, &hn->hn_requests, hr_list) {
+ if (pos->hr_proto->hp_handler_class != class)
+ continue;
+ __remove_pending_locked(hn, pos);
+ req = pos;
+ break;
+ }
+ spin_unlock(&hn->hn_lock);
+
+ return req;
+}
+EXPORT_SYMBOL_IF_KUNIT(handshake_req_next);
+
+/**
+ * handshake_req_submit - Submit a handshake request
+ * @sock: open socket on which to perform the handshake
+ * @req: handshake arguments
+ * @flags: memory allocation flags
+ *
+ * Return values:
+ * %0: Request queued
+ * %-EINVAL: Invalid argument
+ * %-EBUSY: A handshake is already under way for this socket
+ * %-ESRCH: No handshake agent is available
+ * %-EAGAIN: Too many pending handshake requests
+ * %-ENOMEM: Failed to allocate memory
+ * %-EMSGSIZE: Failed to construct notification message
+ * %-EOPNOTSUPP: Handshake module not initialized
+ *
+ * A zero return value from handshake_req_submit() means that
+ * exactly one subsequent completion callback is guaranteed.
+ *
+ * A negative return value from handshake_req_submit() means that
+ * no completion callback will be done and that @req has been
+ * destroyed.
+ */
+int handshake_req_submit(struct socket *sock, struct handshake_req *req,
+ gfp_t flags)
+{
+ struct handshake_net *hn;
+ struct net *net;
+ int ret;
+
+ if (!sock || !req || !sock->file) {
+ kfree(req);
+ return -EINVAL;
+ }
+
+ req->hr_sk = sock->sk;
+ if (!req->hr_sk) {
+ kfree(req);
+ return -EINVAL;
+ }
+ req->hr_odestruct = req->hr_sk->sk_destruct;
+ req->hr_sk->sk_destruct = handshake_sk_destruct;
+
+ ret = -EOPNOTSUPP;
+ net = sock_net(req->hr_sk);
+ hn = handshake_pernet(net);
+ if (!hn)
+ goto out_err;
+
+ ret = -EAGAIN;
+ if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max)
+ goto out_err;
+
+ spin_lock(&hn->hn_lock);
+ ret = -EOPNOTSUPP;
+ if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags))
+ goto out_unlock;
+ ret = -EBUSY;
+ if (!handshake_req_hash_add(req))
+ goto out_unlock;
+ if (!__add_pending_locked(hn, req))
+ goto out_unlock;
+ spin_unlock(&hn->hn_lock);
+
+ ret = handshake_genl_notify(net, req->hr_proto, flags);
+ if (ret) {
+ trace_handshake_notify_err(net, req, req->hr_sk, ret);
+ if (remove_pending(hn, req))
+ goto out_err;
+ }
+
+ /* Prevent socket release while a handshake request is pending */
+ sock_hold(req->hr_sk);
+
+ trace_handshake_submit(net, req, req->hr_sk);
+ return 0;
+
+out_unlock:
+ spin_unlock(&hn->hn_lock);
+out_err:
+ trace_handshake_submit_err(net, req, req->hr_sk, ret);
+ handshake_req_destroy(req);
+ return ret;
+}
+EXPORT_SYMBOL(handshake_req_submit);
+
+void handshake_complete(struct handshake_req *req, unsigned int status,
+ struct genl_info *info)
+{
+ struct sock *sk = req->hr_sk;
+ struct net *net = sock_net(sk);
+
+ if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+ trace_handshake_complete(net, req, sk, status);
+ req->hr_proto->hp_done(req, status, info);
+
+ /* Handshake request is no longer pending */
+ sock_put(sk);
+ }
+}
+EXPORT_SYMBOL_IF_KUNIT(handshake_complete);
+
+/**
+ * handshake_req_cancel - Cancel an in-progress handshake
+ * @sk: socket on which there is an ongoing handshake
+ *
+ * Request cancellation races with request completion. To determine
+ * who won, callers examine the return value from this function.
+ *
+ * Return values:
+ * %true - Uncompleted handshake request was canceled
+ * %false - Handshake request already completed or not found
+ */
+bool handshake_req_cancel(struct sock *sk)
+{
+ struct handshake_req *req;
+ struct handshake_net *hn;
+ struct net *net;
+
+ net = sock_net(sk);
+ req = handshake_req_hash_lookup(sk);
+ if (!req) {
+ trace_handshake_cancel_none(net, req, sk);
+ return false;
+ }
+
+ hn = handshake_pernet(net);
+ if (hn && remove_pending(hn, req)) {
+ /* Request hadn't been accepted */
+ goto out_true;
+ }
+ if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+ /* Request already completed */
+ trace_handshake_cancel_busy(net, req, sk);
+ return false;
+ }
+
+out_true:
+ trace_handshake_cancel(net, req, sk);
+
+ /* Handshake request is no longer pending */
+ sock_put(sk);
+ return true;
+}
+EXPORT_SYMBOL(handshake_req_cancel);
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
new file mode 100644
index 000000000000..fcbeb63b4eb1
--- /dev/null
+++ b/net/handshake/tlshd.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Establish a TLS session for a kernel socket consumer
+ * using the tlshd user space handler.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2021-2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+
+#include <net/sock.h>
+#include <net/handshake.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/keyctl.h>
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+
+struct tls_handshake_req {
+ void (*th_consumer_done)(void *data, int status,
+ key_serial_t peerid);
+ void *th_consumer_data;
+
+ int th_type;
+ unsigned int th_timeout_ms;
+ int th_auth_mode;
+ key_serial_t th_keyring;
+ key_serial_t th_certificate;
+ key_serial_t th_privkey;
+
+ unsigned int th_num_peerids;
+ key_serial_t th_peerid[5];
+};
+
+static struct tls_handshake_req *
+tls_handshake_req_init(struct handshake_req *req,
+ const struct tls_handshake_args *args)
+{
+ struct tls_handshake_req *treq = handshake_req_private(req);
+
+ treq->th_timeout_ms = args->ta_timeout_ms;
+ treq->th_consumer_done = args->ta_done;
+ treq->th_consumer_data = args->ta_data;
+ treq->th_keyring = args->ta_keyring;
+ treq->th_num_peerids = 0;
+ treq->th_certificate = TLS_NO_CERT;
+ treq->th_privkey = TLS_NO_PRIVKEY;
+ return treq;
+}
+
+static void tls_handshake_remote_peerids(struct tls_handshake_req *treq,
+ struct genl_info *info)
+{
+ struct nlattr *head = nlmsg_attrdata(info->nlhdr, GENL_HDRLEN);
+ int rem, len = nlmsg_attrlen(info->nlhdr, GENL_HDRLEN);
+ struct nlattr *nla;
+ unsigned int i;
+
+ i = 0;
+ nla_for_each_attr(nla, head, len, rem) {
+ if (nla_type(nla) == HANDSHAKE_A_DONE_REMOTE_AUTH)
+ i++;
+ }
+ if (!i)
+ return;
+ treq->th_num_peerids = min_t(unsigned int, i,
+ ARRAY_SIZE(treq->th_peerid));
+
+ i = 0;
+ nla_for_each_attr(nla, head, len, rem) {
+ if (nla_type(nla) == HANDSHAKE_A_DONE_REMOTE_AUTH)
+ treq->th_peerid[i++] = nla_get_u32(nla);
+ if (i >= treq->th_num_peerids)
+ break;
+ }
+}
+
+/**
+ * tls_handshake_done - callback to handle a CMD_DONE request
+ * @req: socket on which the handshake was performed
+ * @status: session status code
+ * @info: full results of session establishment
+ *
+ */
+static void tls_handshake_done(struct handshake_req *req,
+ unsigned int status, struct genl_info *info)
+{
+ struct tls_handshake_req *treq = handshake_req_private(req);
+
+ treq->th_peerid[0] = TLS_NO_PEERID;
+ if (info)
+ tls_handshake_remote_peerids(treq, info);
+
+ treq->th_consumer_done(treq->th_consumer_data, -status,
+ treq->th_peerid[0]);
+}
+
+#if IS_ENABLED(CONFIG_KEYS)
+static int tls_handshake_private_keyring(struct tls_handshake_req *treq)
+{
+ key_ref_t process_keyring_ref, keyring_ref;
+ int ret;
+
+ if (treq->th_keyring == TLS_NO_KEYRING)
+ return 0;
+
+ process_keyring_ref = lookup_user_key(KEY_SPEC_PROCESS_KEYRING,
+ KEY_LOOKUP_CREATE,
+ KEY_NEED_WRITE);
+ if (IS_ERR(process_keyring_ref)) {
+ ret = PTR_ERR(process_keyring_ref);
+ goto out;
+ }
+
+ keyring_ref = lookup_user_key(treq->th_keyring, KEY_LOOKUP_CREATE,
+ KEY_NEED_LINK);
+ if (IS_ERR(keyring_ref)) {
+ ret = PTR_ERR(keyring_ref);
+ goto out_put_key;
+ }
+
+ ret = key_link(key_ref_to_ptr(process_keyring_ref),
+ key_ref_to_ptr(keyring_ref));
+
+ key_ref_put(keyring_ref);
+out_put_key:
+ key_ref_put(process_keyring_ref);
+out:
+ return ret;
+}
+#else
+static int tls_handshake_private_keyring(struct tls_handshake_req *treq)
+{
+ return 0;
+}
+#endif
+
+static int tls_handshake_put_peer_identity(struct sk_buff *msg,
+ struct tls_handshake_req *treq)
+{
+ unsigned int i;
+
+ for (i = 0; i < treq->th_num_peerids; i++)
+ if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
+ treq->th_peerid[i]) < 0)
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int tls_handshake_put_certificate(struct sk_buff *msg,
+ struct tls_handshake_req *treq)
+{
+ struct nlattr *entry_attr;
+
+ if (treq->th_certificate == TLS_NO_CERT &&
+ treq->th_privkey == TLS_NO_PRIVKEY)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, HANDSHAKE_A_ACCEPT_CERTIFICATE);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, HANDSHAKE_A_X509_CERT,
+ treq->th_certificate) ||
+ nla_put_u32(msg, HANDSHAKE_A_X509_PRIVKEY,
+ treq->th_privkey)) {
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+}
+
+/**
+ * tls_handshake_accept - callback to construct a CMD_ACCEPT response
+ * @req: handshake parameters to return
+ * @info: generic netlink message context
+ * @fd: file descriptor to be returned
+ *
+ * Returns zero on success, or a negative errno on failure.
+ */
+static int tls_handshake_accept(struct handshake_req *req,
+ struct genl_info *info, int fd)
+{
+ struct tls_handshake_req *treq = handshake_req_private(req);
+ struct nlmsghdr *hdr;
+ struct sk_buff *msg;
+ int ret;
+
+ ret = tls_handshake_private_keyring(treq);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOMEM;
+ msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto out;
+ hdr = handshake_genl_put(msg, info);
+ if (!hdr)
+ goto out_cancel;
+
+ ret = -EMSGSIZE;
+ ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
+ if (ret < 0)
+ goto out_cancel;
+ ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
+ if (ret < 0)
+ goto out_cancel;
+ if (treq->th_timeout_ms) {
+ ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms);
+ if (ret < 0)
+ goto out_cancel;
+ }
+
+ ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_AUTH_MODE,
+ treq->th_auth_mode);
+ if (ret < 0)
+ goto out_cancel;
+ switch (treq->th_auth_mode) {
+ case HANDSHAKE_AUTH_PSK:
+ ret = tls_handshake_put_peer_identity(msg, treq);
+ if (ret < 0)
+ goto out_cancel;
+ break;
+ case HANDSHAKE_AUTH_X509:
+ ret = tls_handshake_put_certificate(msg, treq);
+ if (ret < 0)
+ goto out_cancel;
+ break;
+ }
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+out_cancel:
+ genlmsg_cancel(msg, hdr);
+out:
+ return ret;
+}
+
+static const struct handshake_proto tls_handshake_proto = {
+ .hp_handler_class = HANDSHAKE_HANDLER_CLASS_TLSHD,
+ .hp_privsize = sizeof(struct tls_handshake_req),
+ .hp_flags = BIT(HANDSHAKE_F_PROTO_NOTIFY),
+
+ .hp_accept = tls_handshake_accept,
+ .hp_done = tls_handshake_done,
+};
+
+/**
+ * tls_client_hello_anon - request an anonymous TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ * %0: Handshake request enqueue; ->done will be called when complete
+ * %-ESRCH: No user agent is available
+ * %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_anon(const struct tls_handshake_args *args, gfp_t flags)
+{
+ struct tls_handshake_req *treq;
+ struct handshake_req *req;
+
+ req = handshake_req_alloc(&tls_handshake_proto, flags);
+ if (!req)
+ return -ENOMEM;
+ treq = tls_handshake_req_init(req, args);
+ treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+ treq->th_auth_mode = HANDSHAKE_AUTH_UNAUTH;
+
+ return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_anon);
+
+/**
+ * tls_client_hello_x509 - request an x.509-based TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ * %0: Handshake request enqueue; ->done will be called when complete
+ * %-ESRCH: No user agent is available
+ * %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_x509(const struct tls_handshake_args *args, gfp_t flags)
+{
+ struct tls_handshake_req *treq;
+ struct handshake_req *req;
+
+ req = handshake_req_alloc(&tls_handshake_proto, flags);
+ if (!req)
+ return -ENOMEM;
+ treq = tls_handshake_req_init(req, args);
+ treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+ treq->th_auth_mode = HANDSHAKE_AUTH_X509;
+ treq->th_certificate = args->ta_my_cert;
+ treq->th_privkey = args->ta_my_privkey;
+
+ return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_x509);
+
+/**
+ * tls_client_hello_psk - request a PSK-based TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ * %0: Handshake request enqueue; ->done will be called when complete
+ * %-EINVAL: Wrong number of local peer IDs
+ * %-ESRCH: No user agent is available
+ * %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_psk(const struct tls_handshake_args *args, gfp_t flags)
+{
+ struct tls_handshake_req *treq;
+ struct handshake_req *req;
+ unsigned int i;
+
+ if (!args->ta_num_peerids ||
+ args->ta_num_peerids > ARRAY_SIZE(treq->th_peerid))
+ return -EINVAL;
+
+ req = handshake_req_alloc(&tls_handshake_proto, flags);
+ if (!req)
+ return -ENOMEM;
+ treq = tls_handshake_req_init(req, args);
+ treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+ treq->th_auth_mode = HANDSHAKE_AUTH_PSK;
+ treq->th_num_peerids = args->ta_num_peerids;
+ for (i = 0; i < args->ta_num_peerids; i++)
+ treq->th_peerid[i] = args->ta_my_peerids[i];
+
+ return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_psk);
+
+/**
+ * tls_server_hello_x509 - request a server TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ * %0: Handshake request enqueue; ->done will be called when complete
+ * %-ESRCH: No user agent is available
+ * %-ENOMEM: Memory allocation failed
+ */
+int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags)
+{
+ struct tls_handshake_req *treq;
+ struct handshake_req *req;
+
+ req = handshake_req_alloc(&tls_handshake_proto, flags);
+ if (!req)
+ return -ENOMEM;
+ treq = tls_handshake_req_init(req, args);
+ treq->th_type = HANDSHAKE_MSG_TYPE_SERVERHELLO;
+ treq->th_auth_mode = HANDSHAKE_AUTH_X509;
+ treq->th_certificate = args->ta_my_cert;
+ treq->th_privkey = args->ta_my_privkey;
+
+ return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_server_hello_x509);
+
+/**
+ * tls_server_hello_psk - request a server TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ * %0: Handshake request enqueue; ->done will be called when complete
+ * %-ESRCH: No user agent is available
+ * %-ENOMEM: Memory allocation failed
+ */
+int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags)
+{
+ struct tls_handshake_req *treq;
+ struct handshake_req *req;
+
+ req = handshake_req_alloc(&tls_handshake_proto, flags);
+ if (!req)
+ return -ENOMEM;
+ treq = tls_handshake_req_init(req, args);
+ treq->th_type = HANDSHAKE_MSG_TYPE_SERVERHELLO;
+ treq->th_auth_mode = HANDSHAKE_AUTH_PSK;
+ treq->th_num_peerids = 1;
+ treq->th_peerid[0] = args->ta_my_peerids[0];
+
+ return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_server_hello_psk);
+
+/**
+ * tls_handshake_cancel - cancel a pending handshake
+ * @sk: socket on which there is an ongoing handshake
+ *
+ * Request cancellation races with request completion. To determine
+ * who won, callers examine the return value from this function.
+ *
+ * Return values:
+ * %true - Uncompleted handshake request was canceled
+ * %false - Handshake request already completed or not found
+ */
+bool tls_handshake_cancel(struct sock *sk)
+{
+ return handshake_req_cancel(sk);
+}
+EXPORT_SYMBOL(tls_handshake_cancel);
diff --git a/net/handshake/trace.c b/net/handshake/trace.c
new file mode 100644
index 000000000000..1c4d8e27e17a
--- /dev/null
+++ b/net/handshake/trace.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trace points for transport security layer handshakes.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "handshake.h"
+
+#define CREATE_TRACE_POINTS
+
+#include <trace/events/handshake.h>
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 880277c9fd07..b18ba8ef93ad 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
-fou-y := fou_core.o fou_nl.o
+fou-y := fou_core.o fou_nl.o fou_bpf.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 13fc0c185cd9..4406d796cc2f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -72,15 +72,11 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ int off, int size)
{
const struct btf_type *t;
size_t end;
- if (atype == BPF_READ)
- return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
-
t = btf_type_by_id(reg->btf, reg->btf_id);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
@@ -113,6 +109,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
case offsetof(struct tcp_sock, ecn_flags):
end = offsetofend(struct tcp_sock, ecn_flags);
break;
+ case offsetof(struct tcp_sock, app_limited):
+ end = offsetofend(struct tcp_sock, app_limited);
+ break;
default:
bpf_log(log, "no write support to tcp_sock at off %d\n", off);
return -EACCES;
@@ -239,8 +238,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name,
sizeof(tcp_ca->name)) <= 0)
return -EINVAL;
- if (tcp_ca_find(utcp_ca->name))
- return -EEXIST;
return 1;
}
@@ -266,13 +263,25 @@ static void bpf_tcp_ca_unreg(void *kdata)
tcp_unregister_congestion_control(kdata);
}
+static int bpf_tcp_ca_update(void *kdata, void *old_kdata)
+{
+ return tcp_update_congestion_control(kdata, old_kdata);
+}
+
+static int bpf_tcp_ca_validate(void *kdata)
+{
+ return tcp_validate_congestion_control(kdata);
+}
+
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
+ .update = bpf_tcp_ca_update,
.check_member = bpf_tcp_ca_check_member,
.init_member = bpf_tcp_ca_init_member,
.init = bpf_tcp_ca_init,
+ .validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
};
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
new file mode 100644
index 000000000000..3760a14b6b57
--- /dev/null
+++ b/net/ipv4/fou_bpf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Fou Helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+
+#include <net/dst_metadata.h>
+#include <net/fou.h>
+
+struct bpf_fou_encap {
+ __be16 sport;
+ __be16 dport;
+};
+
+enum bpf_fou_encap_type {
+ FOU_BPF_ENCAP_FOU,
+ FOU_BPF_ENCAP_GUE,
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in BTF");
+
+/* bpf_skb_set_fou_encap - Set FOU encap parameters
+ *
+ * This function allows for using GUE or FOU encapsulation together with an
+ * ipip device in collect-metadata mode.
+ *
+ * It is meant to be used in BPF tc-hooks and after a call to the
+ * bpf_skb_set_tunnel_key helper, responsible for setting IP addresses.
+ *
+ * Parameters:
+ * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL
+ * @encap Pointer to a `struct bpf_fou_encap` storing UDP src and
+ * dst ports. If sport is set to 0 the kernel will auto-assign a
+ * port. This is similar to using `encap-sport auto`.
+ * Cannot be NULL
+ * @type Encapsulation type for the packet. Their definitions are
+ * specified in `enum bpf_fou_encap_type`
+ */
+__bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap, int type)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(!encap))
+ return -EINVAL;
+
+ if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ switch (type) {
+ case FOU_BPF_ENCAP_FOU:
+ info->encap.type = TUNNEL_ENCAP_FOU;
+ break;
+ case FOU_BPF_ENCAP_GUE:
+ info->encap.type = TUNNEL_ENCAP_GUE;
+ break;
+ default:
+ info->encap.type = TUNNEL_ENCAP_NONE;
+ }
+
+ if (info->key.tun_flags & TUNNEL_CSUM)
+ info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM;
+
+ info->encap.sport = encap->sport;
+ info->encap.dport = encap->dport;
+
+ return 0;
+}
+
+/* bpf_skb_get_fou_encap - Get FOU encap parameters
+ *
+ * This function allows for reading encap metadata from a packet received
+ * on an ipip device in collect-metadata mode.
+ *
+ * Parameters:
+ * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL
+ * @encap Pointer to a struct bpf_fou_encap storing UDP source and
+ * destination port. Cannot be NULL
+ */
+__bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(!info))
+ return -EINVAL;
+
+ encap->sport = info->encap.sport;
+ encap->dport = info->encap.dport;
+
+ return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(fou_kfunc_set)
+BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
+BTF_ID_FLAGS(func, bpf_skb_get_fou_encap)
+BTF_SET8_END(fou_kfunc_set)
+
+static const struct btf_kfunc_id_set fou_bpf_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &fou_kfunc_set,
+};
+
+int register_fou_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &fou_bpf_kfunc_set);
+}
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index cafec9b4eee0..0c41076e31ed 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -1236,10 +1236,15 @@ static int __init fou_init(void)
if (ret < 0)
goto unregister;
+ ret = register_fou_bpf();
+ if (ret < 0)
+ goto kfunc_failed;
+
ret = ip_tunnel_encap_add_fou_ops();
if (ret == 0)
return 0;
+kfunc_failed:
genl_unregister_family(&fou_nl_family);
unregister:
unregister_pernet_device(&fou_net_ops);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8cebb476b3ab..b8607763d113 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -749,6 +749,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
room = 576;
room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
room -= sizeof(struct icmphdr);
+ /* Guard against tiny mtu. We need to include at least one
+ * IP network header for this message to make any sense.
+ */
+ if (room <= (int)sizeof(struct iphdr))
+ goto ende;
icmp_param.data_len = skb_in->len - icmp_param.offset;
if (icmp_param.data_len > room)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2541083d49ad..beeae624c412 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -359,6 +359,20 @@ err_dev_set_mtu:
return ERR_PTR(err);
}
+void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph;
+
+ if (iph->protocol != IPPROTO_UDP)
+ return;
+
+ udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
+ info->encap.sport = udph->source;
+ info->encap.dport = udph->dest;
+}
+EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
+
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error)
@@ -572,7 +586,11 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
dev_net(dev), 0, skb->mark, skb_get_hash(skb),
key->flow_flags);
- if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
+
+ if (!tunnel_hlen)
+ tunnel_hlen = ip_encap_hlen(&tun_info->encap);
+
+ if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
goto tx_error;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
@@ -732,7 +750,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
dev_net(dev), tunnel->parms.link,
tunnel->fwmark, skb_get_hash(skb), 0);
- if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
goto tx_error;
if (connected && md) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index abea77759b7e..27b8f83c6ea2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -241,6 +241,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
return 0;
+ ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info);
}
skb_reset_mac_header(skb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 409ec2a1f95b..5178a3f3cb53 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1089,13 +1089,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
}
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
- __acquires(RCU)
+ __acquires(ping_table.lock)
{
struct ping_iter_state *state = seq->private;
state->bucket = 0;
state->family = family;
- rcu_read_lock();
+ spin_lock(&ping_table.lock);
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
@@ -1121,9 +1121,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(ping_seq_next);
void ping_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
+ __releases(ping_table.lock)
{
- rcu_read_unlock();
+ spin_unlock(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_seq_stop);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3cf68695b40d..ff712bf2a98d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
- struct hlist_nulls_head *hlist;
+ struct hlist_head *hlist;
hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
- __sk_nulls_add_node_rcu(sk, hlist);
+ sk_add_node_rcu(sk, hlist);
sock_set_flag(sk, SOCK_RCU_FREE);
spin_unlock(&h->lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -110,7 +110,7 @@ void raw_unhash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
spin_lock(&h->lock);
- if (__sk_nulls_del_node_init_rcu(sk))
+ if (sk_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock(&h->lock);
}
@@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
static int raw_v4_input(struct net *net, struct sk_buff *skb,
const struct iphdr *iph, int hash)
{
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
+ struct hlist_head *hlist;
int dif = inet_iif(skb);
int delivered = 0;
struct sock *sk;
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
if (!raw_v4_match(net, sk, iph->protocol,
iph->saddr, iph->daddr, dif, sdif))
continue;
@@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
struct net *net = dev_net(skb->dev);
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
int dif = skb->dev->ifindex;
int sdif = inet_sdif(skb);
+ struct hlist_head *hlist;
const struct iphdr *iph;
struct sock *sk;
int hash;
@@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
iph = (const struct iphdr *)skb->data;
if (!raw_v4_match(net, sk, iph->protocol,
iph->daddr, iph->saddr, dif, sdif))
@@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket)
{
struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
+ struct hlist_head *hlist;
struct sock *sk;
for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE;
++state->bucket) {
hlist = &h->ht[state->bucket];
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each(sk, hlist) {
if (sock_net(sk) == seq_file_net(seq))
return sk;
}
@@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
struct raw_iter_state *state = raw_seq_private(seq);
do {
- sk = sk_nulls_next(sk);
+ sk = sk_next(sk);
} while (sk && sock_net(sk) != seq_file_net(seq));
if (!sk)
@@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
}
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
+ __acquires(&h->lock)
{
- rcu_read_lock();
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
+
+ spin_lock(&h->lock);
+
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
+ __releases(&h->lock)
{
- rcu_read_unlock();
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
+
+ spin_unlock(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_seq_stop);
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index bca49a844f01..63a40e4b678f 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, const struct sock *sk,
static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
{
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
+ struct hlist_head *hlist;
struct sock *sk;
int slot;
@@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2
rcu_read_lock();
for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
hlist = &hashinfo->ht[slot];
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
if (raw_lookup(net, sk, r)) {
/*
* Grab it and keep until we fill
@@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
int num, s_num, slot, s_slot;
+ struct hlist_head *hlist;
struct sock *sk = NULL;
struct nlattr *bc;
@@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
num = 0;
hlist = &hashinfo->ht[slot];
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d0cc4ef2b85..40fe70fc2015 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -25,6 +25,7 @@ static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_app_win_max = 31;
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
static int tcp_min_snd_mss_max = 65535;
static int ip_privileged_port_min;
@@ -1198,6 +1199,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_app_win_max,
},
{
.procname = "tcp_adv_win_scale",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fd68d49490f2..20db115c38c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2165,7 +2165,7 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
struct msghdr cmsg_dummy;
msg_control_addr = (unsigned long)zc->msg_control;
- cmsg_dummy.msg_control = (void *)msg_control_addr;
+ cmsg_dummy.msg_control_user = (void __user *)msg_control_addr;
cmsg_dummy.msg_controllen =
(__kernel_size_t)zc->msg_controllen;
cmsg_dummy.msg_flags = in_compat_syscall()
@@ -2176,7 +2176,7 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
zc->msg_controllen == cmsg_dummy.msg_controllen) {
tcp_recv_timestamp(&cmsg_dummy, sk, tss);
zc->msg_control = (__u64)
- ((uintptr_t)cmsg_dummy.msg_control);
+ ((uintptr_t)cmsg_dummy.msg_control_user);
zc->msg_controllen =
(__u64)cmsg_dummy.msg_controllen;
zc->msg_flags = (__u32)cmsg_dummy.msg_flags;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db8b4b488c31..1b34050a7538 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
return NULL;
}
-/*
- * Attach new congestion control algorithm to the list
- * of available options.
- */
-int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
{
- int ret = 0;
-
/* all algorithms must implement these */
if (!ca->ssthresh || !ca->undo_cwnd ||
!(ca->cong_avoid || ca->cong_control)) {
@@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
return -EINVAL;
}
+ return 0;
+}
+
+/* Attach new congestion control algorithm to the list
+ * of available options.
+ */
+int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+{
+ int ret;
+
+ ret = tcp_validate_congestion_control(ca);
+ if (ret)
+ return ret;
+
ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
spin_lock(&tcp_cong_list_lock);
@@ -130,6 +138,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
+/* Replace a registered old ca with a new one.
+ *
+ * The new ca must have the same name as the old one, that has been
+ * registered.
+ */
+int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
+{
+ struct tcp_congestion_ops *existing;
+ int ret;
+
+ ret = tcp_validate_congestion_control(ca);
+ if (ret)
+ return ret;
+
+ ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
+ spin_lock(&tcp_cong_list_lock);
+ existing = tcp_ca_find_key(old_ca->key);
+ if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
+ pr_notice("%s not registered or non-unique key\n",
+ ca->name);
+ ret = -EINVAL;
+ } else if (existing != old_ca) {
+ pr_notice("invalid old congestion control algorithm to replace\n");
+ ret = -EINVAL;
+ } else {
+ /* Add the new one before removing the old one to keep
+ * one implementation available all the time.
+ */
+ list_add_tail_rcu(&ca->list, &tcp_cong_list);
+ list_del_rcu(&existing->list);
+ pr_debug("%s updated\n", ca->name);
+ }
+ spin_unlock(&tcp_cong_list_lock);
+
+ /* Wait for outstanding readers to complete before the
+ * module or struct_ops gets removed entirely.
+ */
+ if (!ret)
+ synchronize_rcu();
+
+ return ret;
+}
+
u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 89daa6b953ff..39bda2b1066e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2780,7 +2780,7 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
{
while (iter->cur_sk < iter->end_sk)
- sock_put(iter->batch[iter->cur_sk++]);
+ sock_gen_put(iter->batch[iter->cur_sk++]);
}
static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
@@ -2941,7 +2941,7 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
* st->bucket. See tcp_seek_last_pos().
*/
st->offset++;
- sock_put(iter->batch[iter->cur_sk++]);
+ sock_gen_put(iter->batch[iter->cur_sk++]);
}
if (iter->cur_sk < iter->end_sk)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e1b679a590c9..2bbf13216a3d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -952,6 +952,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+ net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0;
/* By default, rate limit error messages.
* Except for pmtu discovery, it would break it.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1f53f2a74480..9edf1f45b1ed 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -362,9 +362,10 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
/*
* We won't send icmp if the destination is known
- * anycast.
+ * anycast unless we need to treat anycast as unicast.
*/
- if (ipv6_anycast_destination(dst, &fl6->daddr)) {
+ if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
+ ipv6_anycast_destination(dst, &fl6->daddr)) {
net_dbg_ratelimited("icmp6_send: acast source\n");
dst_release(dst);
return ERR_PTR(-EINVAL);
@@ -1195,6 +1196,15 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.mode = 0644,
.proc_handler = proc_do_large_bitmap,
},
+ {
+ .procname = "error_anycast_as_unicast",
+ .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ },
};
@@ -1212,6 +1222,7 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
+ table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
}
return table;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0b6140f0179d..9554cf46ed88 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1965,8 +1965,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ u8 icmp6_type;
- ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ icmp6_type = fl6->fl6_icmp_type;
+ else
+ icmp6_type = icmp6_hdr(skb)->icmp6_type;
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2917dd8d198c..ae818ff46224 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -716,6 +716,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
goto done;
msg.msg_controllen = optlen;
+ msg.msg_control_is_user = false;
msg.msg_control = (void *)(opt+1);
ipc6.opt = opt;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4ab62a9c5c8e..7d0adb612bdd 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
{
struct net *net = dev_net(skb->dev);
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
const struct in6_addr *saddr;
const struct in6_addr *daddr;
+ struct hlist_head *hlist;
struct sock *sk;
bool delivered = false;
__u8 hash;
@@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
int filtered;
if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
@@ -331,15 +330,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
u8 type, u8 code, int inner_offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- struct hlist_nulls_head *hlist;
- struct hlist_nulls_node *hnode;
+ struct hlist_head *hlist;
struct sock *sk;
int hash;
hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
- sk_nulls_for_each(sk, hnode, hlist) {
+ sk_for_each_rcu(sk, hlist) {
/* Note: ipv6_hdr(skb) != skb->data */
const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
index 488aec9e1a74..d1876f192225 100644
--- a/net/ipv6/rpl.c
+++ b/net/ipv6/rpl.c
@@ -32,7 +32,8 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i)
size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
unsigned char cmpre)
{
- return (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre);
+ return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) +
+ IPV6_PFXTAIL_LEN(cmpre);
}
void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 70d81bba5093..063560e2cb1a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1024,7 +1024,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
+ if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) {
ip_rt_put(rt);
goto tx_error;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4caa70a1b871..e5a337e6b970 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1397,9 +1397,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
- if (ipv6_only_sock(sk))
- return -ENETUNREACH;
- return udp_sendmsg(sk, msg, len);
+ err = ipv6_only_sock(sk) ?
+ -ENETUNREACH : udp_sendmsg(sk, msg, len);
+ msg->msg_name = sin6;
+ msg->msg_namelen = addr_len;
+ return err;
}
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4db5a554bdbd..41a74fc84ca1 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -677,8 +677,8 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
MODULE_DESCRIPTION("L2TP over IP");
MODULE_VERSION("1.0");
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
- * enums
+/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
+ * because __stringify doesn't like enums
*/
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
-MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2478aa60145f..5137ea1861ce 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -806,8 +806,8 @@ MODULE_AUTHOR("Chris Elston <celston@katalix.com>");
MODULE_DESCRIPTION("L2TP IP encapsulation for IPv6");
MODULE_VERSION("1.0");
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
- * enums
+/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
+ * because __stringify doesn't like enums
*/
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP);
-MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 115, 2);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET6, 115);
diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h
new file mode 100644
index 000000000000..49dc809cab29
--- /dev/null
+++ b/net/mac80211/drop.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * mac80211 drop reason list
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+
+#ifndef MAC80211_DROP_H
+#define MAC80211_DROP_H
+#include <net/dropreason.h>
+
+typedef unsigned int __bitwise ieee80211_rx_result;
+
+#define MAC80211_DROP_REASONS_MONITOR(R) \
+ R(RX_DROP_M_UNEXPECTED_4ADDR_FRAME) \
+ R(RX_DROP_M_BAD_BCN_KEYIDX) \
+ R(RX_DROP_M_BAD_MGMT_KEYIDX) \
+/* this line for the trailing \ - add before this */
+
+#define MAC80211_DROP_REASONS_UNUSABLE(R) \
+ R(RX_DROP_U_MIC_FAIL) \
+ R(RX_DROP_U_REPLAY) \
+ R(RX_DROP_U_BAD_MMIE) \
+/* this line for the trailing \ - add before this */
+
+/* having two enums allows for checking ieee80211_rx_result use with sparse */
+enum ___mac80211_drop_reason {
+/* if we get to the end of handlers with RX_CONTINUE this will be the reason */
+ ___RX_CONTINUE = SKB_CONSUMED,
+
+/* this never gets used as an argument to kfree_skb_reason() */
+ ___RX_QUEUED = SKB_NOT_DROPPED_YET,
+
+#define ENUM(x) ___ ## x,
+ ___RX_DROP_MONITOR = SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+ MAC80211_DROP_REASONS_MONITOR(ENUM)
+
+ ___RX_DROP_UNUSABLE = SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+ MAC80211_DROP_REASONS_UNUSABLE(ENUM)
+#undef ENUM
+};
+
+enum mac80211_drop_reason {
+ RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE,
+ RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED,
+ RX_DROP_MONITOR = (__force ieee80211_rx_result)___RX_DROP_MONITOR,
+ RX_DROP_UNUSABLE = (__force ieee80211_rx_result)___RX_DROP_UNUSABLE,
+#define DEF(x) x = (__force ieee80211_rx_result)___ ## x,
+ MAC80211_DROP_REASONS_MONITOR(DEF)
+ MAC80211_DROP_REASONS_UNUSABLE(DEF)
+#undef DEF
+};
+
+#endif /* MAC80211_DROP_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9b7e184430b8..a0a7839cb961 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -33,6 +33,7 @@
#include "key.h"
#include "sta_info.h"
#include "debug.h"
+#include "drop.h"
extern const struct cfg80211_ops mac80211_config_ops;
@@ -170,13 +171,6 @@ struct ieee80211_tx_data {
unsigned int flags;
};
-
-typedef unsigned __bitwise ieee80211_rx_result;
-#define RX_CONTINUE ((__force ieee80211_rx_result) 0u)
-#define RX_DROP_UNUSABLE ((__force ieee80211_rx_result) 1u)
-#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
-#define RX_QUEUED ((__force ieee80211_rx_result) 3u)
-
/**
* enum ieee80211_packet_rx_flags - packet RX flags
* @IEEE80211_RX_AMSDU: a-MSDU packet
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ddf2b7811c55..55cdfaef0f5d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -22,6 +22,7 @@
#include <linux/bitmap.h>
#include <linux/inetdevice.h>
#include <net/net_namespace.h>
+#include <net/dropreason.h>
#include <net/cfg80211.h>
#include <net/addrconf.h>
@@ -1542,6 +1543,28 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_free_hw);
+static const char * const drop_reasons_monitor[] = {
+#define V(x) #x,
+ [0] = "RX_DROP_MONITOR",
+ MAC80211_DROP_REASONS_MONITOR(V)
+};
+
+static struct drop_reason_list drop_reason_list_monitor = {
+ .reasons = drop_reasons_monitor,
+ .n_reasons = ARRAY_SIZE(drop_reasons_monitor),
+};
+
+static const char * const drop_reasons_unusable[] = {
+ [0] = "RX_DROP_UNUSABLE",
+ MAC80211_DROP_REASONS_UNUSABLE(V)
+#undef V
+};
+
+static struct drop_reason_list drop_reason_list_unusable = {
+ .reasons = drop_reasons_unusable,
+ .n_reasons = ARRAY_SIZE(drop_reasons_unusable),
+};
+
static int __init ieee80211_init(void)
{
struct sk_buff *skb;
@@ -1559,6 +1582,11 @@ static int __init ieee80211_init(void)
if (ret)
goto err_netdev;
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
+ &drop_reason_list_monitor);
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
+ &drop_reason_list_unusable);
+
return 0;
err_netdev:
rc80211_minstrel_exit();
@@ -1574,6 +1602,9 @@ static void __exit ieee80211_exit(void)
ieee80211_iface_exit();
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR);
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE);
+
rcu_barrier();
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index db3451f5f2fb..58222c077898 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1826,7 +1826,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
cfg80211_rx_unexpected_4addr_frame(
rx->sdata->dev, sta->sta.addr,
GFP_ATOMIC);
- return RX_DROP_MONITOR;
+ return RX_DROP_M_UNEXPECTED_4ADDR_FRAME;
}
/*
* Update counter and free packet here to avoid
@@ -1961,7 +1961,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data,
skb->len);
- return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+ return RX_DROP_M_BAD_BCN_KEYIDX;
}
rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx);
@@ -1975,7 +1975,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (mmie_keyidx < NUM_DEFAULT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
- return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+ return RX_DROP_M_BAD_MGMT_KEYIDX; /* unexpected BIP keyidx */
if (rx->link_sta) {
if (ieee80211_is_group_privacy_action(skb) &&
test_sta_flag(rx->sta, WLAN_STA_MFP))
@@ -3960,7 +3960,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
}
static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
- struct ieee80211_rate *rate)
+ struct ieee80211_rate *rate,
+ ieee80211_rx_result reason)
{
struct ieee80211_sub_if_data *sdata;
struct ieee80211_local *local = rx->local;
@@ -4024,42 +4025,38 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
}
out_free_skb:
- dev_kfree_skb(skb);
+ kfree_skb_reason(skb, (__force u32)reason);
}
static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
ieee80211_rx_result res)
{
- switch (res) {
- case RX_DROP_MONITOR:
- I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
- if (rx->sta)
- rx->link_sta->rx_stats.dropped++;
- fallthrough;
- case RX_CONTINUE: {
- struct ieee80211_rate *rate = NULL;
- struct ieee80211_supported_band *sband;
- struct ieee80211_rx_status *status;
-
- status = IEEE80211_SKB_RXCB((rx->skb));
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_rate *rate = NULL;
- sband = rx->local->hw.wiphy->bands[status->band];
- if (status->encoding == RX_ENC_LEGACY)
- rate = &sband->bitrates[status->rate_idx];
+ if (res == RX_QUEUED) {
+ I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
+ return;
+ }
- ieee80211_rx_cooked_monitor(rx, rate);
- break;
- }
- case RX_DROP_UNUSABLE:
+ if (res != RX_CONTINUE) {
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
rx->link_sta->rx_stats.dropped++;
- dev_kfree_skb(rx->skb);
- break;
- case RX_QUEUED:
- I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
- break;
}
+
+ if (u32_get_bits((__force u32)res, SKB_DROP_REASON_SUBSYS_MASK) ==
+ SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE) {
+ kfree_skb_reason(rx->skb, (__force u32)res);
+ return;
+ }
+
+ sband = rx->local->hw.wiphy->bands[status->band];
+ if (status->encoding == RX_ENC_LEGACY)
+ rate = &sband->bitrates[status->rate_idx];
+
+ ieee80211_rx_cooked_monitor(rx, rate, res);
}
static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 20f742b5503b..4133496da378 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -550,7 +550,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
if (res < 0 ||
(!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) {
key->u.ccmp.replays++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
}
if (!(status->flag & RX_FLAG_DECRYPTED)) {
@@ -564,7 +564,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
data_len,
skb->data + skb->len - mic_len))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MIC_FAIL;
}
memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
@@ -746,7 +746,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
if (res < 0 ||
(!res && !(status->flag & RX_FLAG_ALLOW_SAME_PN))) {
key->u.gcmp.replays++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
}
if (!(status->flag & RX_FLAG_DECRYPTED)) {
@@ -761,7 +761,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
data_len,
skb->data + skb->len -
IEEE80211_GCMP_MIC_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MIC_FAIL;
}
memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
@@ -930,13 +930,13 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
(skb->data + skb->len - sizeof(*mmie));
if (mmie->element_id != WLAN_EID_MMIE ||
mmie->length != sizeof(*mmie) - 2)
- return RX_DROP_UNUSABLE; /* Invalid MMIE */
+ return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */
bip_ipn_swap(ipn, mmie->sequence_number);
if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
key->u.aes_cmac.replays++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
}
if (!(status->flag & RX_FLAG_DECRYPTED)) {
@@ -946,7 +946,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
skb->data + 24, skb->len - 24, mic);
if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MIC_FAIL;
}
}
@@ -986,7 +986,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
key->u.aes_cmac.replays++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
}
if (!(status->flag & RX_FLAG_DECRYPTED)) {
@@ -996,7 +996,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
skb->data + 24, skb->len - 24, mic);
if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MIC_FAIL;
}
}
@@ -1079,13 +1079,13 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
(skb->data + skb->len - sizeof(*mmie));
if (mmie->element_id != WLAN_EID_MMIE ||
mmie->length != sizeof(*mmie) - 2)
- return RX_DROP_UNUSABLE; /* Invalid MMIE */
+ return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */
bip_ipn_swap(ipn, mmie->sequence_number);
if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) {
key->u.aes_gmac.replays++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
}
if (!(status->flag & RX_FLAG_DECRYPTED)) {
@@ -1104,7 +1104,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_gmac.icverrors++;
kfree(mic);
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MIC_FAIL;
}
kfree(mic);
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index d237d142171c..bceaab8dd8e4 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -9,11 +9,18 @@
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req)
{
- struct sock *ssk = subflow->tcp_sock;
- struct sock *sk = subflow->conn;
+ struct sock *sk, *ssk;
struct sk_buff *skb;
struct tcp_sock *tp;
+ /* on early fallback the subflow context is deleted by
+ * subflow_syn_recv_sock()
+ */
+ if (!subflow)
+ return;
+
+ ssk = subflow->tcp_sock;
+ sk = subflow->conn;
tp = tcp_sk(ssk);
subflow->is_mptfo = 1;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b30cea2fbf3f..19a01b6566f1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -442,7 +442,6 @@ static void clear_3rdack_retransmission(struct sock *sk)
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -556,7 +555,6 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -580,7 +578,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy = *mpext;
}
- remaining -= map_size;
dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
@@ -851,9 +848,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
}
snd_data_fin = mptcp_data_fin_enabled(msk);
- if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) {
unsigned int mp_fail_size;
ret = true;
@@ -1001,7 +998,7 @@ check_notify:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk);
} else {
- mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
+ mptcp_pm_fully_established(msk, ssk);
}
return true;
@@ -1192,9 +1189,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
if (mp_opt.data_fin && mp_opt.data_len == 1 &&
- mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
- schedule_work(&msk->work))
- sock_hold(subflow->conn);
+ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
+ mptcp_schedule_work((struct sock *)msk);
return true;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 70f0ced3ca86..78c924506e83 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
@@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
if (announce)
- mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
+ mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1c42bebca39e..bc343dab5e3f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1035,8 +1035,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index a02d3cbf2a1b..27a275805c06 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -25,8 +25,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
}
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d26b9114373..08dc53f56bc2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
-/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
- * completed yet or has failed, return the subflow socket.
- * Otherwise return NULL.
- */
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
-{
- if (!msk->subflow || READ_ONCE(msk->can_ack))
- return NULL;
-
- return msk->subflow;
-}
-
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
{
@@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
+/* If the MPC handshake is not started, returns the first subflow,
+ * eventually allocating it.
+ */
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ return ERR_PTR(-EINVAL);
+
+ if (!msk->subflow) {
+ if (msk->first)
+ return ERR_PTR(-EINVAL);
+
+ ret = __mptcp_socket_create(msk);
+ if (ret)
+ return ERR_PTR(ret);
+
+ mptcp_sockopt_sync(msk, msk->first);
+ }
+
+ return msk->subflow;
+}
+
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -1662,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)
static int mptcp_disconnect(struct sock *sk, int flags);
-static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, int *copied_syn)
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ struct sock *ssk;
int ret;
+ /* on flags based fastopen the mptcp is supposed to create the
+ * first subflow right now. Otherwise we are in the defer_connect
+ * path, and the first subflow must be already present.
+ * Since the defer_connect flag is cleared after the first succsful
+ * fastopen attempt, no need to check for additional subflow status.
+ */
+ if (msg->msg_flags & MSG_FASTOPEN) {
+ ssock = __mptcp_nmpc_socket(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+ }
+ if (!msk->first)
+ return -EINVAL;
+
+ ssk = msk->first;
+
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
@@ -1691,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
} else if (ret && ret != -EINPROGRESS) {
mptcp_disconnect(sk, 0);
}
+ inet_sk(sk)->defer_connect = 0;
return ret;
}
@@ -1699,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
@@ -1709,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
- msg->msg_flags & MSG_FASTOPEN))) {
+ if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
- ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
+ ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2315,7 +2344,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push, dispose_it;
+ bool dispose_it, need_push = false;
+
+ /* If the first subflow moved to a close state before accept, e.g. due
+ * to an incoming reset, mptcp either:
+ * - if either the subflow or the msk are dead, destroy the context
+ * (the subflow socket is deleted by inet_child_forget) and the msk
+ * - otherwise do nothing at the moment and take action at accept and/or
+ * listener shutdown - user-space must be able to accept() the closed
+ * socket.
+ */
+ if (msk->in_accept_queue && msk->first == ssk) {
+ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
+ return;
+
+ /* ensure later check in mptcp_worker() will dispose the msk */
+ sock_set_flag(sk, SOCK_DEAD);
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_drop_ctx(ssk);
+ goto out_release;
+ }
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@@ -2351,28 +2399,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
- } else if (msk->in_accept_queue && msk->first == ssk) {
- /* if the first subflow moved to a close state, e.g. due to
- * incoming reset and we reach here before inet_child_forget()
- * the TCP stack could later try to close it via
- * inet_csk_listen_stop(), or deliver it to the user space via
- * accept().
- * We can't delete the subflow - or risk a double free - nor let
- * the msk survive - or will be leaked in the non accept scenario:
- * fallback and let TCP cope with the subflow cleanup.
- */
- WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
- mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN)
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
+
+out_release:
release_sock(ssk);
sock_put(ssk);
@@ -2427,21 +2469,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
- /* if the MPC subflow has been closed before the msk is accepted,
- * msk will never be accept-ed, close it now
- */
- if (!msk->first && msk->in_accept_queue) {
- sock_set_flag(sk, SOCK_DEAD);
- inet_sk_state_store(sk, TCP_CLOSE);
- }
}
-static bool mptcp_check_close_timeout(const struct sock *sk)
+static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
- if (delta >= TCP_TIMEWAIT_LEN)
+ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@@ -2626,7 +2661,7 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk);
state = sk->sk_state;
- if (unlikely(state == TCP_CLOSE))
+ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
mptcp_check_data_fin_ack(sk);
@@ -2649,7 +2684,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_check_close_timeout(sk)) {
+ if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@@ -2728,10 +2763,6 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
- ret = __mptcp_socket_create(mptcp_sk(sk));
- if (ret)
- return ret;
-
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2895,6 +2926,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
+void __mptcp_unaccepted_force_close(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ __mptcp_destroy_sock(sk);
+}
+
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@@ -2928,10 +2967,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
- if (mptcp_check_readable(msk)) {
- /* the msk has read data, do the MPTCP equivalent of TCP reset */
+ if (mptcp_check_readable(msk) || timeout < 0) {
+ /* If the msk has read data, or the caller explicitly ask it,
+ * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
+ */
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
+ timeout = 0;
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}
@@ -3143,7 +3185,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = __mptcp_nmpc_socket(msk);
+ listener = msk->subflow;
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3363,7 +3405,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
- ssock = __mptcp_nmpc_socket(msk);
+ ssock = msk->subflow;
pr_debug("msk=%p, subflow=%p", msk, ssock);
if (WARN_ON_ONCE(!ssock))
return -EINVAL;
@@ -3551,8 +3593,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int err = -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
@@ -3640,8 +3682,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sock->sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3677,8 +3719,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3709,7 +3751,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- ssock = __mptcp_nmpc_socket(msk);
+ /* buggy applications can call accept on socket states other then LISTEN
+ * but no need to allocate the first subflow just to error out.
+ */
+ ssock = msk->subflow;
if (!ssock)
return -EINVAL;
@@ -3733,6 +3778,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
+
+ /* Do late cleanup for the first subflow as necessary. Also
+ * deal with bad peers not doing a complete shutdown.
+ */
+ if (msk->first &&
+ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ __mptcp_close_ssk(newsk, msk->first,
+ mptcp_subflow_ctx(msk->first), 0);
+ if (unlikely(list_empty(&msk->conn_list)))
+ inet_sk_state_store(newsk, TCP_CLOSE);
+ }
+
release_sock(newsk);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e1310bc113be..2d7b2c80a164 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -626,10 +626,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
@@ -782,7 +784,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk,
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
@@ -830,8 +832,6 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index b655cebda0f3..d4258869ac48 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTOIFINDEX:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
@@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
@@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
issk = inet_sk(ssock->sk);
@@ -762,13 +762,15 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *sock;
- int ret = -EINVAL;
+ int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
- if (!sock)
+ if (IS_ERR(sock)) {
+ ret = PTR_ERR(sock);
goto unlock;
+ }
ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
@@ -861,7 +863,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
- int ret = -EINVAL;
+ int ret;
struct sock *ssk;
lock_sock(sk);
@@ -872,8 +874,10 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
}
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
+ if (IS_ERR(ssock)) {
+ ret = PTR_ERR(ssock);
goto out;
+ }
ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 33dd27765116..ba065b66551a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -408,9 +408,8 @@ void mptcp_subflow_reset(struct sock *ssk)
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
- if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
- schedule_work(&mptcp_sk(sk)->work))
- return; /* worker will put sk for us */
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}
@@ -716,9 +715,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
if (!ctx)
return;
- subflow_ulp_fallback(ssk, ctx);
- if (ctx->conn)
- sock_put(ctx->conn);
+ list_del(&mptcp_subflow_ctx(ssk)->node);
+ if (inet_csk(ssk)->icsk_ulp_ops) {
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+ }
kfree_rcu(ctx, rcu);
}
@@ -851,7 +853,7 @@ create_child:
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
mptcp_subflow_fully_established(ctx, &mp_opt);
- mptcp_pm_fully_established(owner, child, GFP_ATOMIC);
+ mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
} else if (ctx->mp_join) {
@@ -1101,8 +1103,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
} else {
- if (updated && schedule_work(&msk->work))
- sock_hold((struct sock *)msk);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
return MAPPING_DATA_FIN;
}
@@ -1205,17 +1207,12 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
- struct sock *sk = (struct sock *)msk;
-
if (likely(ssk->sk_state != TCP_CLOSE))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
- sock_hold(sk);
- if (!schedule_work(&msk->work))
- sock_put(sk);
- }
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ mptcp_schedule_work((struct sock *)msk);
}
static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
@@ -1808,6 +1805,77 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+ struct sock *sk;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ sk = subflow->conn;
+ msk = mptcp_sk(sk);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ sock_hold(sk);
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ sk = (struct sock *)msk;
+
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ next = msk->dl_next;
+ msk->dl_next = NULL;
+
+ __mptcp_unaccepted_force_close(sk);
+ release_sock(sk);
+
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
+ mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
+ sock_put(sk);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index cd99e6dc1f35..3f821b7ba646 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -192,8 +192,7 @@ BTF_ID(struct, nf_conn___init)
/* Check writes into `struct nf_conn` */
static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ int off, int size)
{
const struct btf_type *ncit, *nct, *t;
size_t end;
@@ -401,8 +400,6 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
*/
__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
{
- if (!nfct)
- return;
nf_ct_put(nfct);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6004d4b24451..e48ab8dfb541 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3447,6 +3447,64 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
return 0;
}
+int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
+ const struct nft_data *data;
+ int err;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+ return 0;
+
+ data = nft_set_ext_data(ext);
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ pctx->level++;
+ err = nft_chain_validate(ctx, data->verdict.chain);
+ if (err < 0)
+ return err;
+ pctx->level--;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+struct nft_set_elem_catchall {
+ struct list_head list;
+ struct rcu_head rcu;
+ void *elem;
+};
+
+int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+ int ret = 0;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ ret = nft_setelem_validate(ctx, set, NULL, &elem);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
const struct nft_chain *chain,
const struct nlattr *nla);
@@ -4759,12 +4817,6 @@ err_set_name:
return err;
}
-struct nft_set_elem_catchall {
- struct list_head list;
- struct rcu_head rcu;
- void *elem;
-};
-
static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
struct nft_set *set)
{
@@ -6056,7 +6108,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
return err;
- if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+ if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) ||
+ (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY]))
return -EINVAL;
if (flags != 0) {
@@ -7052,7 +7105,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
}
if (nla[NFTA_OBJ_USERDATA]) {
- obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL);
+ obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL_ACCOUNT);
if (obj->udata == NULL)
goto err_userdata;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index cae5a6724163..cecf8ab90e58 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -199,37 +199,6 @@ nla_put_failure:
return -1;
}
-static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
- struct nft_ctx *pctx = (struct nft_ctx *)ctx;
- const struct nft_data *data;
- int err;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- pctx->level++;
- err = nft_chain_validate(ctx, data->verdict.chain);
- if (err < 0)
- return err;
- pctx->level--;
- break;
- default:
- break;
- }
-
- return 0;
-}
-
static int nft_lookup_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **d)
@@ -245,9 +214,12 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
iter.skip = 0;
iter.count = 0;
iter.err = 0;
- iter.fn = nft_lookup_validate_setelem;
+ iter.fn = nft_setelem_validate;
priv->set->ops->walk(ctx, priv->set, &iter);
+ if (!iter.err)
+ iter.err = nft_set_catchall_validate(ctx, priv->set);
+
if (iter.err < 0)
return iter.err;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 877f1da1a8ac..1db4742e443d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1952,7 +1952,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- size_t copied;
+ size_t copied, max_recvmsg_len;
struct sk_buff *skb, *data_skb;
int err, ret;
@@ -1985,9 +1985,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
#endif
/* Record the max length of recvmsg() calls for future allocations */
- nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
- nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
- SKB_WITH_OVERHEAD(32768));
+ max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len);
+ max_recvmsg_len = min_t(size_t, max_recvmsg_len,
+ SKB_WITH_OVERHEAD(32768));
+ WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len);
copied = data_skb->len;
if (len < copied) {
@@ -2234,6 +2235,7 @@ static int netlink_dump(struct sock *sk)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
int alloc_min_size;
@@ -2256,8 +2258,9 @@ static int netlink_dump(struct sock *sk)
cb = &nlk->cb;
alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
- if (alloc_min_size < nlk->max_recvmsg_len) {
- alloc_size = nlk->max_recvmsg_len;
+ max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len);
+ if (alloc_min_size < max_recvmsg_len) {
+ alloc_size = max_recvmsg_len;
skb = alloc_skb(alloc_size,
(GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOWARN | __GFP_NORETRY);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ca3ebfdb3023..a8cf9a88758e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -913,7 +913,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport)) {
+ if (likely(vport && netif_carrier_ok(vport->dev))) {
u16 mru = OVS_CB(skb)->mru;
u32 cutlen = OVS_CB(skb)->cutlen;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 568f8d76e3c1..6080c0db0814 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2090,18 +2090,18 @@ static unsigned int run_filter(struct sk_buff *skb,
}
static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
- size_t *len)
+ size_t *len, int vnet_hdr_sz)
{
- struct virtio_net_hdr vnet_hdr;
+ struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 };
- if (*len < sizeof(vnet_hdr))
+ if (*len < vnet_hdr_sz)
return -EINVAL;
- *len -= sizeof(vnet_hdr);
+ *len -= vnet_hdr_sz;
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
+ if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0))
return -EINVAL;
- return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+ return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz);
}
/*
@@ -2250,7 +2250,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
__u32 ts_status;
bool is_drop_n_account = false;
unsigned int slot_id = 0;
- bool do_vnet = false;
+ int vnet_hdr_sz = 0;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
* We may add members to them until current aligned size without forcing
@@ -2308,10 +2308,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
- netoff += sizeof(struct virtio_net_hdr);
- do_vnet = true;
- }
+ vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+ if (vnet_hdr_sz)
+ netoff += vnet_hdr_sz;
macoff = netoff - maclen;
}
if (netoff > USHRT_MAX) {
@@ -2337,7 +2336,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
snaplen = 0;
- do_vnet = false;
+ vnet_hdr_sz = 0;
}
}
} else if (unlikely(macoff + snaplen >
@@ -2351,7 +2350,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely((int)snaplen < 0)) {
snaplen = 0;
macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
- do_vnet = false;
+ vnet_hdr_sz = 0;
}
}
spin_lock(&sk->sk_receive_queue.lock);
@@ -2367,7 +2366,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
__set_bit(slot_id, po->rx_ring.rx_owner_map);
}
- if (do_vnet &&
+ if (vnet_hdr_sz &&
virtio_net_hdr_from_skb(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr),
vio_le(), true, 0)) {
@@ -2551,16 +2550,26 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
}
static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
- struct virtio_net_hdr *vnet_hdr)
+ struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz)
{
- if (*len < sizeof(*vnet_hdr))
+ int ret;
+
+ if (*len < vnet_hdr_sz)
return -EINVAL;
- *len -= sizeof(*vnet_hdr);
+ *len -= vnet_hdr_sz;
if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
return -EFAULT;
- return __packet_snd_vnet_parse(vnet_hdr, *len);
+ ret = __packet_snd_vnet_parse(vnet_hdr, *len);
+ if (ret)
+ return ret;
+
+ /* move iter to point to the start of mac header */
+ if (vnet_hdr_sz != sizeof(struct virtio_net_hdr))
+ iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr));
+
+ return 0;
}
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
@@ -2722,6 +2731,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
void *ph;
DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+ int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
unsigned char *addr = NULL;
int tp_len, size_max;
void *data;
@@ -2779,8 +2789,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if ((size_max > dev->mtu + reserve + VLAN_HLEN) &&
- !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
+ if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
size_max = dev->mtu + reserve + VLAN_HLEN;
reinit_completion(&po->skb_completion);
@@ -2809,10 +2818,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
- if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
+ if (vnet_hdr_sz) {
vnet_hdr = data;
- data += sizeof(*vnet_hdr);
- tp_len -= sizeof(*vnet_hdr);
+ data += vnet_hdr_sz;
+ tp_len -= vnet_hdr_sz;
if (tp_len < 0 ||
__packet_snd_vnet_parse(vnet_hdr, tp_len)) {
tp_len = -EINVAL;
@@ -2837,7 +2846,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
addr, hlen, copylen, &sockc);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
- !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) &&
+ !vnet_hdr_sz &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
@@ -2856,7 +2865,7 @@ tpacket_error:
}
}
- if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
+ if (vnet_hdr_sz) {
if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
tp_len = -EINVAL;
goto tpacket_error;
@@ -2946,7 +2955,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
struct packet_sock *po = pkt_sk(sk);
- bool has_vnet_hdr = false;
+ int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
int hlen, tlen, linear;
int extra_len = 0;
@@ -2990,11 +2999,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
- if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
- err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+ if (vnet_hdr_sz) {
+ err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz);
if (err)
goto out_unlock;
- has_vnet_hdr = true;
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -3064,11 +3072,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
packet_parse_headers(skb, sock);
- if (has_vnet_hdr) {
+ if (vnet_hdr_sz) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
goto out_free;
- len += sizeof(vnet_hdr);
+ len += vnet_hdr_sz;
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
@@ -3408,7 +3416,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
- int vnet_hdr_len = 0;
+ int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
unsigned int origlen = 0;
err = -EINVAL;
@@ -3449,11 +3457,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
packet_rcv_try_clear_pressure(pkt_sk(sk));
- if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) {
- err = packet_rcv_vnet(msg, skb, &len);
+ if (vnet_hdr_len) {
+ err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
if (err)
goto out_free;
- vnet_hdr_len = sizeof(struct virtio_net_hdr);
}
/* You lose any data beyond the buffer you gave. If it worries
@@ -3915,8 +3922,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
return 0;
}
case PACKET_VNET_HDR:
+ case PACKET_VNET_HDR_SZ:
{
- int val;
+ int val, hdr_len;
if (sock->type != SOCK_RAW)
return -EINVAL;
@@ -3925,11 +3933,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
+ if (optname == PACKET_VNET_HDR_SZ) {
+ if (val && val != sizeof(struct virtio_net_hdr) &&
+ val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
+ return -EINVAL;
+ hdr_len = val;
+ } else {
+ hdr_len = val ? sizeof(struct virtio_net_hdr) : 0;
+ }
lock_sock(sk);
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
- packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val);
+ WRITE_ONCE(po->vnet_hdr_sz, hdr_len);
ret = 0;
}
release_sock(sk);
@@ -4062,7 +4078,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
break;
case PACKET_VNET_HDR:
- val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR);
+ val = !!READ_ONCE(po->vnet_hdr_sz);
+ break;
+ case PACKET_VNET_HDR_SZ:
+ val = READ_ONCE(po->vnet_hdr_sz);
break;
case PACKET_VERSION:
val = po->tp_version;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index de4ced5cf3e8..d0c4eda4cdc6 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -27,7 +27,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_flags |= PDI_AUXDATA;
if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV))
pinfo.pdi_flags |= PDI_ORIGDEV;
- if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
+ if (READ_ONCE(po->vnet_hdr_sz))
pinfo.pdi_flags |= PDI_VNETHDR;
if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS))
pinfo.pdi_flags |= PDI_LOSS;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 27930f69f368..63f4865202c1 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -118,6 +118,7 @@ struct packet_sock {
struct mutex pg_vec_lock;
unsigned long flags;
int ifindex; /* bound device */
+ u8 vnet_hdr_sz;
__be16 num;
struct packet_rollover *rollover;
struct packet_mclist *mclist;
@@ -139,7 +140,6 @@ enum packet_sock_flags {
PACKET_SOCK_AUXDATA,
PACKET_SOCK_TX_HAS_OFF,
PACKET_SOCK_TP_LOSS,
- PACKET_SOCK_HAS_VNET_HDR,
PACKET_SOCK_RUNNING,
PACKET_SOCK_PRESSURE,
PACKET_SOCK_QDISC_BYPASS,
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 5c2fb992803b..76f0434d3d06 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -393,10 +393,12 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
struct qrtr_node *node;
unsigned long flags;
+ mutex_lock(&qrtr_node_lock);
spin_lock_irqsave(&qrtr_nodes_lock, flags);
node = radix_tree_lookup(&qrtr_nodes, nid);
node = qrtr_node_acquire(node);
spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+ mutex_unlock(&qrtr_node_lock);
return node;
}
@@ -496,6 +498,11 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
+ if ((cb->type == QRTR_TYPE_NEW_SERVER ||
+ cb->type == QRTR_TYPE_RESUME_TX) &&
+ size < sizeof(struct qrtr_ctrl_pkt))
+ goto err;
+
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
cb->type != QRTR_TYPE_RESUME_TX)
goto err;
@@ -508,9 +515,6 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
/* Remote node endpoint can bridge other distant nodes */
const struct qrtr_ctrl_pkt *pkt;
- if (size < sizeof(*pkt))
- goto err;
-
pkt = data + hdrlen;
qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
}
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 722936f7dd98..0f25a386138c 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -274,7 +274,7 @@ err:
return NULL;
}
-static int server_del(struct qrtr_node *node, unsigned int port)
+static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
{
struct qrtr_lookup *lookup;
struct qrtr_server *srv;
@@ -287,7 +287,7 @@ static int server_del(struct qrtr_node *node, unsigned int port)
radix_tree_delete(&node->servers, port);
/* Broadcast the removal of local servers */
- if (srv->node == qrtr_ns.local_node)
+ if (srv->node == qrtr_ns.local_node && bcast)
service_announce_del(&qrtr_ns.bcast_sq, srv);
/* Announce the service's disappearance to observers */
@@ -373,7 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
}
slot = radix_tree_iter_resume(slot, &iter);
rcu_read_unlock();
- server_del(node, srv->port);
+ server_del(node, srv->port, true);
rcu_read_lock();
}
rcu_read_unlock();
@@ -459,10 +459,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
kfree(lookup);
}
- /* Remove the server belonging to this port */
+ /* Remove the server belonging to this port but don't broadcast
+ * DEL_SERVER. Neighbours would've already removed the server belonging
+ * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove().
+ */
node = node_get(node_id);
if (node)
- server_del(node, port);
+ server_del(node, port, false);
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -567,7 +570,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
if (!node)
return -ENOENT;
- return server_del(node, port);
+ return server_del(node, port, true);
}
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 95e9304024b7..8ed285023a40 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -376,8 +376,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
sctph->checksum = sctp_compute_cksum(skb,
skb_network_offset(skb) + ihl);
- skb->ip_summed = CHECKSUM_NONE;
- skb->csum_not_inet = 0;
+ skb_reset_csum_not_inet(skb);
return 1;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2a6b6be0811b..35785a36c802 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3235,6 +3235,9 @@ int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
err_miss_alloc:
tcf_exts_destroy(exts);
+#ifdef CONFIG_NET_CLS_ACT
+ exts->actions = NULL;
+#endif
return err;
}
EXPORT_SYMBOL(tcf_exts_init_ex);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index fdd6a6575a54..dc5a0ff50b14 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -5,6 +5,7 @@
* Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
*/
+#include <linux/ethtool_netlink.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -27,15 +28,19 @@ struct mqprio_sched {
u32 flags;
u64 min_rate[TC_QOPT_MAX_QUEUE];
u64 max_rate[TC_QOPT_MAX_QUEUE];
+ u32 fp[TC_QOPT_MAX_QUEUE];
};
static int mqprio_enable_offload(struct Qdisc *sch,
const struct tc_mqprio_qopt *qopt,
struct netlink_ext_ack *extack)
{
- struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
struct mqprio_sched *priv = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct tc_mqprio_qopt_offload mqprio = {
+ .qopt = *qopt,
+ .extack = extack,
+ };
int err, i;
switch (priv->mode) {
@@ -60,6 +65,8 @@ static int mqprio_enable_offload(struct Qdisc *sch,
return -EINVAL;
}
+ mqprio_fp_to_offload(priv->fp, &mqprio);
+
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
&mqprio);
if (err)
@@ -133,48 +140,131 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
/* If ndo_setup_tc is not present then hardware doesn't support offload
* and we should return an error.
*/
- if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+ if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not support hardware offload");
return -EINVAL;
+ }
return 0;
}
+static const struct
+nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = {
+ [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
+ TC_QOPT_MAX_QUEUE),
+ [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
+ TC_FP_EXPRESS,
+ TC_FP_PREEMPTIBLE),
+};
+
static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
[TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
[TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
[TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
[TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
+ [TCA_MQPRIO_TC_ENTRY] = { .type = NLA_NESTED },
};
-static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
- const struct nla_policy *policy, int len)
+static int mqprio_parse_tc_entry(u32 fp[TC_QOPT_MAX_QUEUE],
+ struct nlattr *opt,
+ unsigned long *seen_tcs,
+ struct netlink_ext_ack *extack)
{
- int nested_len = nla_len(nla) - NLA_ALIGN(len);
+ struct nlattr *tb[TCA_MQPRIO_TC_ENTRY_MAX + 1];
+ int err, tc;
+
+ err = nla_parse_nested(tb, TCA_MQPRIO_TC_ENTRY_MAX, opt,
+ mqprio_tc_entry_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_MQPRIO_TC_ENTRY_INDEX)) {
+ NL_SET_ERR_MSG(extack, "TC entry index missing");
+ return -EINVAL;
+ }
+
+ tc = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_INDEX]);
+ if (*seen_tcs & BIT(tc)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_TC_ENTRY_INDEX],
+ "Duplicate tc entry");
+ return -EINVAL;
+ }
+
+ *seen_tcs |= BIT(tc);
- if (nested_len >= nla_attr_size(0))
- return nla_parse_deprecated(tb, maxtype,
- nla_data(nla) + NLA_ALIGN(len),
- nested_len, policy, NULL);
+ if (tb[TCA_MQPRIO_TC_ENTRY_FP])
+ fp[tc] = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_FP]);
- memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
return 0;
}
+static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt,
+ int nlattr_opt_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ bool have_preemption = false;
+ unsigned long seen_tcs = 0;
+ u32 fp[TC_QOPT_MAX_QUEUE];
+ struct nlattr *n;
+ int tc, rem;
+ int err = 0;
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ fp[tc] = priv->fp[tc];
+
+ nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) {
+ if (nla_type(n) != TCA_MQPRIO_TC_ENTRY)
+ continue;
+
+ err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack);
+ if (err)
+ goto out;
+ }
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+ priv->fp[tc] = fp[tc];
+ if (fp[tc] == TC_FP_PREEMPTIBLE)
+ have_preemption = true;
+ }
+
+ if (have_preemption && !ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG(extack, "Device does not support preemption");
+ return -EOPNOTSUPP;
+ }
+out:
+ return err;
+}
+
+/* Parse the other netlink attributes that represent the payload of
+ * TCA_OPTIONS, which are appended right after struct tc_mqprio_qopt.
+ */
static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
- struct nlattr *opt)
+ struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
+ struct nlattr *nlattr_opt = nla_data(opt) + NLA_ALIGN(sizeof(*qopt));
+ int nlattr_opt_len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
struct mqprio_sched *priv = qdisc_priv(sch);
- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1] = {};
struct nlattr *attr;
int i, rem, err;
- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
- sizeof(*qopt));
- if (err < 0)
- return err;
+ if (nlattr_opt_len >= nla_attr_size(0)) {
+ err = nla_parse_deprecated(tb, TCA_MQPRIO_MAX, nlattr_opt,
+ nlattr_opt_len, mqprio_policy,
+ NULL);
+ if (err < 0)
+ return err;
+ }
- if (!qopt->hw)
+ if (!qopt->hw) {
+ NL_SET_ERR_MSG(extack,
+ "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode");
return -EINVAL;
+ }
if (tb[TCA_MQPRIO_MODE]) {
priv->flags |= TC_MQPRIO_F_MODE;
@@ -187,13 +277,19 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
}
if (tb[TCA_MQPRIO_MIN_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64],
+ "min_rate accepted only when shaper is in bw_rlimit mode");
return -EINVAL;
+ }
i = 0;
nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
rem) {
- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
return -EINVAL;
+ }
if (i >= qopt->num_tc)
break;
priv->min_rate[i] = nla_get_u64(attr);
@@ -203,13 +299,19 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
}
if (tb[TCA_MQPRIO_MAX_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64],
+ "max_rate accepted only when shaper is in bw_rlimit mode");
return -EINVAL;
+ }
i = 0;
nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
rem) {
- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
return -EINVAL;
+ }
if (i >= qopt->num_tc)
break;
priv->max_rate[i] = nla_get_u64(attr);
@@ -218,6 +320,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
priv->flags |= TC_MQPRIO_F_MAX_RATE;
}
+ if (tb[TCA_MQPRIO_TC_ENTRY]) {
+ err = mqprio_parse_tc_entries(sch, nlattr_opt, nlattr_opt_len,
+ extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -231,7 +340,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
struct tc_mqprio_caps caps;
- int len;
+ int len, tc;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -249,6 +358,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ priv->fp[tc] = TC_FP_EXPRESS;
+
qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO,
&caps, sizeof(caps));
@@ -258,7 +370,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
if (len > 0) {
- err = mqprio_parse_nlattr(sch, qopt, opt);
+ err = mqprio_parse_nlattr(sch, qopt, opt, extack);
if (err)
return err;
}
@@ -399,6 +511,33 @@ nla_put_failure:
return -1;
}
+static int mqprio_dump_tc_entries(struct mqprio_sched *priv,
+ struct sk_buff *skb)
+{
+ struct nlattr *n;
+ int tc;
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+ n = nla_nest_start(skb, TCA_MQPRIO_TC_ENTRY);
+ if (!n)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_INDEX, tc))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_FP, priv->fp[tc]))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, n);
+ }
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, n);
+ return -EMSGSIZE;
+}
+
static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
@@ -449,6 +588,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
(dump_rates(priv, &opt, skb) != 0))
goto nla_put_failure;
+ if (mqprio_dump_tc_entries(priv, skb))
+ goto nla_put_failure;
+
return nla_nest_end(skb, nla);
nla_put_failure:
nlmsg_trim(skb, nla);
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
index c58a533b8ec5..83b3793c4012 100644
--- a/net/sched/sch_mqprio_lib.c
+++ b/net/sched/sch_mqprio_lib.c
@@ -114,4 +114,18 @@ void mqprio_qopt_reconstruct(struct net_device *dev, struct tc_mqprio_qopt *qopt
}
EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);
+void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ unsigned long preemptible_tcs = 0;
+ int tc;
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ if (fp[tc] == TC_FP_PREEMPTIBLE)
+ preemptible_tcs |= BIT(tc);
+
+ mqprio->preemptible_tcs = preemptible_tcs;
+}
+EXPORT_SYMBOL_GPL(mqprio_fp_to_offload);
+
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_mqprio_lib.h b/net/sched/sch_mqprio_lib.h
index 63f725ab8761..079f597072e3 100644
--- a/net/sched/sch_mqprio_lib.h
+++ b/net/sched/sch_mqprio_lib.h
@@ -14,5 +14,7 @@ int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
struct netlink_ext_ack *extack);
void mqprio_qopt_reconstruct(struct net_device *dev,
struct tc_mqprio_qopt *qopt);
+void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],
+ struct tc_mqprio_qopt_offload *mqprio);
#endif
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index cf5ebe43b3b4..02098a02943e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -421,15 +421,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
} else
weight = 1;
- if (tb[TCA_QFQ_LMAX]) {
+ if (tb[TCA_QFQ_LMAX])
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
- if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
- pr_notice("qfq: invalid max length %u\n", lmax);
- return -EINVAL;
- }
- } else
+ else
lmax = psched_mtu(qdisc_dev(sch));
+ if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
+ pr_notice("qfq: invalid max length %u\n", lmax);
+ return -EINVAL;
+ }
+
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1f469861eae3..76db9a10ef50 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -7,6 +7,7 @@
*/
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -96,6 +97,7 @@ struct taprio_sched {
struct list_head taprio_list;
int cur_txq[TC_MAX_QUEUE];
u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
+ u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
u32 txtime_delay;
};
@@ -1002,6 +1004,9 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
[TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
[TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
+ [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
+ TC_FP_EXPRESS,
+ TC_FP_PREEMPTIBLE),
};
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
@@ -1520,22 +1525,31 @@ static int taprio_enable_offload(struct net_device *dev,
return -ENOMEM;
}
offload->enable = 1;
+ offload->extack = extack;
mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
+ offload->mqprio.extack = extack;
taprio_sched_to_offload(dev, sched, offload, &caps);
+ mqprio_fp_to_offload(q->fp, &offload->mqprio);
for (tc = 0; tc < TC_MAX_QUEUE; tc++)
offload->max_sdu[tc] = q->max_sdu[tc];
err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
if (err < 0) {
- NL_SET_ERR_MSG(extack,
- "Device failed to setup taprio offload");
+ NL_SET_ERR_MSG_WEAK(extack,
+ "Device failed to setup taprio offload");
goto done;
}
q->offloaded = true;
done:
+ /* The offload structure may linger around via a reference taken by the
+ * device driver, so clear up the netlink extack pointer so that the
+ * driver isn't tempted to dereference data which stopped being valid
+ */
+ offload->extack = NULL;
+ offload->mqprio.extack = NULL;
taprio_offload_free(offload);
return err;
@@ -1663,13 +1677,14 @@ out:
static int taprio_parse_tc_entry(struct Qdisc *sch,
struct nlattr *opt,
u32 max_sdu[TC_QOPT_MAX_QUEUE],
+ u32 fp[TC_QOPT_MAX_QUEUE],
unsigned long *seen_tcs,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
struct net_device *dev = qdisc_dev(sch);
- u32 val = 0;
int err, tc;
+ u32 val;
err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
taprio_tc_policy, extack);
@@ -1694,15 +1709,18 @@ static int taprio_parse_tc_entry(struct Qdisc *sch,
*seen_tcs |= BIT(tc);
- if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
+ if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
+ if (val > dev->max_mtu) {
+ NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
+ return -ERANGE;
+ }
- if (val > dev->max_mtu) {
- NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
- return -ERANGE;
+ max_sdu[tc] = val;
}
- max_sdu[tc] = val;
+ if (tb[TCA_TAPRIO_TC_ENTRY_FP])
+ fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
return 0;
}
@@ -1712,29 +1730,51 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
u32 max_sdu[TC_QOPT_MAX_QUEUE];
+ bool have_preemption = false;
unsigned long seen_tcs = 0;
+ u32 fp[TC_QOPT_MAX_QUEUE];
struct nlattr *n;
int tc, rem;
int err = 0;
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
max_sdu[tc] = q->max_sdu[tc];
+ fp[tc] = q->fp[tc];
+ }
nla_for_each_nested(n, opt, rem) {
if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
continue;
- err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
+ err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
extack);
if (err)
- goto out;
+ return err;
}
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
q->max_sdu[tc] = max_sdu[tc];
+ q->fp[tc] = fp[tc];
+ if (fp[tc] != TC_FP_EXPRESS)
+ have_preemption = true;
+ }
+
+ if (have_preemption) {
+ if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ NL_SET_ERR_MSG(extack,
+ "Preemption only supported with full offload");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not support preemption");
+ return -EOPNOTSUPP;
+ }
+ }
-out:
return err;
}
@@ -2015,7 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- int i;
+ int i, tc;
spin_lock_init(&q->current_entry_lock);
@@ -2072,6 +2112,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
q->qdiscs[i] = qdisc;
}
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ q->fp[tc] = TC_FP_EXPRESS;
+
taprio_detect_broken_mqprio(q);
return taprio_change(sch, opt, extack);
@@ -2215,6 +2258,7 @@ error_nest:
}
static int taprio_dump_tc_entries(struct sk_buff *skb,
+ struct taprio_sched *q,
struct sched_gate_list *sched)
{
struct nlattr *n;
@@ -2232,6 +2276,9 @@ static int taprio_dump_tc_entries(struct sk_buff *skb,
sched->max_sdu[tc]))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
+ goto nla_put_failure;
+
nla_nest_end(skb, n);
}
@@ -2273,7 +2320,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
- if (oper && taprio_dump_tc_entries(skb, oper))
+ if (oper && taprio_dump_tc_entries(skb, q, oper))
goto options_error;
if (oper && dump_schedule(skb, oper))
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 63ba5551c13f..796529167e8d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1597,9 +1597,10 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
struct sctp_cookie *cookie,
gfp_t gfp)
{
- int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length);
+ struct sctp_init_chunk *peer_init = (struct sctp_init_chunk *)(cookie + 1);
+ int var_size2 = ntohs(peer_init->chunk_hdr.length);
int var_size3 = cookie->raw_addr_list_len;
- __u8 *raw = (__u8 *)cookie->peer_init + var_size2;
+ __u8 *raw = (__u8 *)peer_init + var_size2;
return sctp_raw_to_bind_addrs(&asoc->base.bind_addr, raw, var_size3,
asoc->ep->base.bind_addr.port, gfp);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 34964145514e..c58fffc86a0c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -738,7 +738,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
tfm = asoc->ep->auth_hmacs[hmac_id];
- digest = auth->auth_hdr.hmac;
+ digest = (u8 *)(&auth->auth_hdr + 1);
if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
goto free;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 127bf28a6033..2613c4d74b16 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1150,7 +1150,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
init = (struct sctp_init_chunk *)skb->data;
/* Walk the parameters looking for embedded addresses. */
- sctp_walk_params(params, init, init_hdr.params) {
+ sctp_walk_params(params, init) {
/* Note: Ignoring hostname addresses. */
af = sctp_get_af_specific(param_type2af(params.p->type));
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 20831079fb09..0dc6b8ab9963 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1231,7 +1231,7 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
unack_data = assoc->next_tsn - assoc->ctsn_ack_point - 1;
- frags = sack->variable;
+ frags = (union sctp_sack_variable *)(sack + 1);
for (i = 0; i < ntohs(sack->num_gap_ack_blocks); i++) {
unack_data -= ((ntohs(frags[i].gab.end) -
ntohs(frags[i].gab.start) + 1));
@@ -1252,7 +1252,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
struct sctp_transport *transport;
struct sctp_chunk *tchunk = NULL;
struct list_head *lchunk, *transport_list, *temp;
- union sctp_sack_variable *frags = sack->variable;
__u32 sack_ctsn, ctsn, tsn;
__u32 highest_tsn, highest_new_tsn;
__u32 sack_a_rwnd;
@@ -1313,8 +1312,12 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
/* Get the highest TSN in the sack. */
highest_tsn = sack_ctsn;
- if (gap_ack_blocks)
+ if (gap_ack_blocks) {
+ union sctp_sack_variable *frags =
+ (union sctp_sack_variable *)(sack + 1);
+
highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
+ }
if (TSN_lt(asoc->highest_sacked, highest_tsn))
asoc->highest_sacked = highest_tsn;
@@ -1789,7 +1792,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
* Block are assumed to have been received correctly.
*/
- frags = sack->variable;
+ frags = (union sctp_sack_variable *)(sack + 1);
blocks = ntohs(sack->num_gap_ack_blocks);
tsn_offset = tsn - ctsn;
for (i = 0; i < blocks; ++i) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index c7503fd64915..08527d882e56 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1707,11 +1707,11 @@ static struct sctp_cookie_param *sctp_pack_cookie(
ktime_get_real());
/* Copy the peer's init packet. */
- memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
+ memcpy(cookie + 1, init_chunk->chunk_hdr,
ntohs(init_chunk->chunk_hdr->length));
/* Copy the raw local address list of the association. */
- memcpy((__u8 *)&cookie->c.peer_init[0] +
+ memcpy((__u8 *)(cookie + 1) +
ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
if (sctp_sk(ep->base.sk)->hmac) {
@@ -2207,7 +2207,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
- /* Tell the peer, we won't support this param. */
+ /* This param has been Deprecated, send ABORT. */
sctp_process_hn_param(asoc, param, chunk, err_chunk);
retval = SCTP_IERROR_ABORT;
break;
@@ -2306,7 +2306,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW)
return sctp_process_inv_mandatory(asoc, chunk, errp);
- sctp_walk_params(param, peer_init, init_hdr.params) {
+ sctp_walk_params(param, peer_init) {
if (param.p->type == SCTP_PARAM_STATE_COOKIE)
has_cookie = true;
}
@@ -2329,7 +2329,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
chunk, errp);
/* Verify all the variable length parameters */
- sctp_walk_params(param, peer_init, init_hdr.params) {
+ sctp_walk_params(param, peer_init) {
result = sctp_verify_param(net, ep, asoc, param, cid,
chunk, errp);
switch (result) {
@@ -2381,7 +2381,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
src_match = 1;
/* Process the initialization parameters. */
- sctp_walk_params(param, peer_init, init_hdr.params) {
+ sctp_walk_params(param, peer_init) {
if (!src_match &&
(param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
@@ -2589,10 +2589,6 @@ do_addr_param:
asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale);
break;
- case SCTP_PARAM_HOST_NAME_ADDRESS:
- pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__);
- break;
-
case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES:
/* Turn off the default values first so we'll know which
* ones are really set by the peer.
@@ -2624,10 +2620,6 @@ do_addr_param:
asoc->peer.ipv6_address = 1;
break;
- case SCTP_PARAM_HOST_NAME_ADDRESS:
- asoc->peer.hostname_address = 1;
- break;
-
default: /* Just ignore anything else. */
break;
}
@@ -3210,7 +3202,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
union sctp_params param;
addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
- sctp_walk_params(param, addip, addip_hdr.params) {
+ sctp_walk_params(param, addip) {
size_t length = ntohs(param.p->length);
*errp = param.p;
@@ -3223,14 +3215,14 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
/* ensure there is only one addr param and it's in the
* beginning of addip_hdr params, or we reject it.
*/
- if (param.v != addip->addip_hdr.params)
+ if (param.v != (addip + 1))
return false;
addr_param_seen = true;
break;
case SCTP_PARAM_IPV6_ADDRESS:
if (length != sizeof(struct sctp_ipv6addr_param))
return false;
- if (param.v != addip->addip_hdr.params)
+ if (param.v != (addip + 1))
return false;
addr_param_seen = true;
break;
@@ -3310,7 +3302,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
goto done;
/* Process the TLVs contained within the ASCONF chunk. */
- sctp_walk_params(param, addip, addip_hdr.params) {
+ sctp_walk_params(param, addip) {
/* Skip preceeding address parameters. */
if (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
param.p->type == SCTP_PARAM_IPV6_ADDRESS)
@@ -3644,7 +3636,7 @@ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
return NULL;
reconf = (struct sctp_reconf_chunk *)retval->chunk_hdr;
- retval->param_hdr.v = reconf->params;
+ retval->param_hdr.v = (u8 *)(reconf + 1);
return retval;
}
@@ -3886,7 +3878,7 @@ bool sctp_verify_reconf(const struct sctp_association *asoc,
__u16 cnt = 0;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
- sctp_walk_params(param, hdr, params) {
+ sctp_walk_params(param, hdr) {
__u16 length = ntohs(param.p->length);
*errp = param.p;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 463c4a58d2c3..7fbeb99d8d32 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -984,8 +984,7 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
{
struct sctp_chunkhdr *unk_chunk_hdr;
- unk_chunk_hdr = (struct sctp_chunkhdr *)
- err_hdr->variable;
+ unk_chunk_hdr = (struct sctp_chunkhdr *)(err_hdr + 1);
switch (unk_chunk_hdr->type) {
/* ADDIP 4.1 A9) If the peer responds to an ASCONF with
* an ERROR chunk reporting that it did not recognized
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ce5426171206..97f1155a2045 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -794,8 +794,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
/* This is a brand-new association, so these are not yet side
* effects--it is safe to run them here.
*/
- peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
-
+ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1);
if (!sctp_process_init(new_asoc, chunk,
&chunk->subh.cookie_hdr->c.peer_addr,
peer_init, GFP_ATOMIC))
@@ -1337,7 +1336,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
* throughout the code today.
*/
errhdr = (struct sctp_errhdr *)buffer;
- addrparm = (union sctp_addr_param *)errhdr->variable;
+ addrparm = (union sctp_addr_param *)(errhdr + 1);
/* Copy into a parm format. */
len = af->to_addr_param(ssa, addrparm);
@@ -1869,8 +1868,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
/* new_asoc is a brand-new association, so these are not yet
* side effects--it is safe to run them here.
*/
- peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
-
+ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1);
if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init,
GFP_ATOMIC))
goto nomem;
@@ -1990,7 +1988,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
/* new_asoc is a brand-new association, so these are not yet
* side effects--it is safe to run them here.
*/
- peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
+ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1);
if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init,
GFP_ATOMIC))
goto nomem;
@@ -4142,7 +4140,7 @@ enum sctp_disposition sctp_sf_do_reconf(struct net *net,
(void *)err_param, commands);
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
- sctp_walk_params(param, hdr, params) {
+ sctp_walk_params(param, hdr) {
struct sctp_chunk *reply = NULL;
struct sctp_ulpevent *ev = NULL;
@@ -4393,7 +4391,7 @@ static enum sctp_ierror sctp_sf_authenticate(
* 3. Compute the new digest
* 4. Compare saved and new digests.
*/
- digest = auth_hdr->hmac;
+ digest = (u8 *)(auth_hdr + 1);
skb_pull(chunk->skb, sig_len);
save_digest = kmemdup(digest, sig_len, GFP_ATOMIC);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b91616f819de..cda8c2874691 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1830,6 +1830,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err)
goto err;
+ if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
+ err = -EINVAL;
+ goto err;
+ }
}
if (sctp_state(asoc, CLOSED)) {
@@ -5188,10 +5192,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_peer_rwnd = asoc->peer.rwnd;
info->sctpi_peer_tag = asoc->c.peer_vtag;
- mask = asoc->peer.ecn_capable << 1;
+ mask = asoc->peer.intl_capable << 1;
+ mask = (mask | asoc->peer.ecn_capable) << 1;
mask = (mask | asoc->peer.ipv4_address) << 1;
mask = (mask | asoc->peer.ipv6_address) << 1;
- mask = (mask | asoc->peer.hostname_address) << 1;
+ mask = (mask | asoc->peer.reconf_capable) << 1;
mask = (mask | asoc->peer.asconf_capable) << 1;
mask = (mask | asoc->peer.prsctp_capable) << 1;
mask = (mask | asoc->peer.auth_capable);
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ee6514af830f..c241cc552e8d 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -491,7 +491,7 @@ static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
return NULL;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
- sctp_walk_params(param, hdr, params) {
+ sctp_walk_params(param, hdr) {
/* sctp_strreset_tsnreq is actually the basic structure
* of all stream reconf params, so it's safe to use it
* to access request_seq.
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 94727feb07b3..840f24045ae2 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -1153,8 +1153,9 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
}
#define _sctp_walk_ifwdtsn(pos, chunk, end) \
- for (pos = chunk->subh.ifwdtsn_hdr->skip; \
- (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
+ for (pos = (void *)(chunk->subh.ifwdtsn_hdr + 1); \
+ (void *)pos <= (void *)(chunk->subh.ifwdtsn_hdr + 1) + (end) - \
+ sizeof(struct sctp_ifwdtsn_skip); pos++)
#define sctp_walk_ifwdtsn(pos, ch) \
_sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c6b4a62276f6..50c38b624f77 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -3270,6 +3270,17 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
sk_common_release(sk);
goto out;
}
+
+ /* smc_clcsock_release() does not wait smc->clcsock->sk's
+ * destruction; its sk_state might not be TCP_CLOSE after
+ * smc->sk is close()d, and TCP timers can be fired later,
+ * which need net ref.
+ */
+ sk = smc->clcsock->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
} else {
smc->clcsock = clcsock;
}
diff --git a/net/socket.c b/net/socket.c
index 73e493da4589..a7b4b37d86df 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -957,6 +957,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+#ifdef CONFIG_WIRELESS
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
@@ -972,6 +973,7 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
}
EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
+#endif
static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index ce0541e32fc9..95ca783795c5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -73,7 +73,6 @@ static void checksum_case(struct kunit *test)
{
const struct gss_krb5_test_param *param = test->param_value;
struct xdr_buf buf = {
- .head[0].iov_base = param->plaintext->data,
.head[0].iov_len = param->plaintext->len,
.len = param->plaintext->len,
};
@@ -99,6 +98,10 @@ static void checksum_case(struct kunit *test)
err = crypto_ahash_setkey(tfm, Kc.data, Kc.len);
KUNIT_ASSERT_EQ(test, err, 0);
+ buf.head[0].iov_base = kunit_kzalloc(test, buf.head[0].iov_len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf.head[0].iov_base);
+ memcpy(buf.head[0].iov_base, param->plaintext->data, buf.head[0].iov_len);
+
checksum.len = gk5e->cksumlength;
checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
@@ -1327,6 +1330,7 @@ static void rfc6803_encrypt_case(struct kunit *test)
if (!gk5e)
kunit_skip(test, "Encryption type is not available");
+ memset(usage_data, 0, sizeof(usage_data));
usage.data[3] = param->constant;
Ke.len = gk5e->Ke_length;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 983c5891cb56..4246363cb095 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -416,14 +416,23 @@ static int unix_gid_hash(kuid_t uid)
return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
}
-static void unix_gid_put(struct kref *kref)
+static void unix_gid_free(struct rcu_head *rcu)
{
- struct cache_head *item = container_of(kref, struct cache_head, ref);
- struct unix_gid *ug = container_of(item, struct unix_gid, h);
+ struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu);
+ struct cache_head *item = &ug->h;
+
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
put_group_info(ug->gi);
- kfree_rcu(ug, rcu);
+ kfree(ug);
+}
+
+static void unix_gid_put(struct kref *kref)
+{
+ struct cache_head *item = container_of(kref, struct cache_head, ref);
+ struct unix_gid *ug = container_of(item, struct unix_gid, h);
+
+ call_rcu(&ug->rcu, unix_gid_free);
}
static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index adcbedc244d6..6cacd70a15ff 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2158,6 +2158,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
switch (skst) {
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
+ case TCP_LAST_ACK:
break;
case TCP_ESTABLISHED:
case TCP_CLOSE_WAIT:
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index dde3c870bddd..e4878551f140 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -94,6 +94,11 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
info->op,
info->flags);
+ if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
+ WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
+ goto out;
+ }
+
return skb;
out:
@@ -1336,6 +1341,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
goto free_pkt;
}
+ if (!skb_set_owner_sk_safe(skb, sk)) {
+ WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
+ goto free_pkt;
+ }
+
vsk = vsock_sk(sk);
lock_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a5375c97f5b0..b370070194fa 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1849,7 +1849,13 @@ static ssize_t vmci_transport_stream_enqueue(
struct msghdr *msg,
size_t len)
{
- return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
+ ssize_t err;
+
+ err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
+ if (err < 0)
+ err = -ENOMEM;
+
+ return err;
}
static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index e3afc0c866f5..5c6360df1f31 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -31,8 +31,7 @@ static int vsock_loopback_send_pkt(struct sk_buff *skb)
struct vsock_loopback *vsock = &the_vsock_loopback;
int len = skb->len;
- skb_queue_tail(&vsock->pkt_queue, skb);
-
+ virtio_vsock_skb_queue_tail(&vsock->pkt_queue, skb);
queue_work(vsock->workqueue, &vsock->pkt_work);
return len;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2ac58b282b5e..cc1e7f15fa73 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1301,9 +1301,10 @@ static int xsk_mmap(struct file *file, struct socket *sock,
loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
+ int state = READ_ONCE(xs->state);
struct xsk_queue *q = NULL;
- if (READ_ONCE(xs->state) != XSK_READY)
+ if (state != XSK_READY && state != XSK_BOUND)
return -EBUSY;
if (offset == XDP_PGOFF_RX_RING) {
@@ -1314,9 +1315,11 @@ static int xsk_mmap(struct file *file, struct socket *sock,
/* Matches the smp_wmb() in XDP_UMEM_REG */
smp_rmb();
if (offset == XDP_UMEM_PGOFF_FILL_RING)
- q = READ_ONCE(xs->fq_tmp);
+ q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) :
+ READ_ONCE(xs->pool->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
- q = READ_ONCE(xs->cq_tmp);
+ q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) :
+ READ_ONCE(xs->pool->cq);
}
if (!q)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bfb2a7e50c26..6d40a77fccbe 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -133,16 +133,12 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 chunk, chunk_end;
+ u64 offset = desc->addr & (pool->chunk_size - 1);
- chunk = xp_aligned_extract_addr(pool, desc->addr);
- if (likely(desc->len)) {
- chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1);
- if (chunk != chunk_end)
- return false;
- }
+ if (offset + desc->len > pool->chunk_size)
+ return false;
- if (chunk >= pool->addrs_cnt)
+ if (desc->addr >= pool->addrs_cnt)
return false;
if (desc->options)
@@ -153,15 +149,12 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr, base_addr;
-
- base_addr = xp_unaligned_extract_addr(desc->addr);
- addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
if (desc->len > pool->chunk_size)
return false;
- if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
+ if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
return false;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 0c38d7175922..2c1427074a3b 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -162,8 +162,8 @@ static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
return ERR_PTR(-EOPNOTSUPP);
}
-static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock __rcu **map_entry;
@@ -223,7 +223,7 @@ out:
return err;
}
-static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+static long xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock __rcu **map_entry;
@@ -243,7 +243,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags)
+static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags)
{
return __bpf_xdp_redirect_map(map, index, flags, 0,
__xsk_map_lookup_elem);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 95f1436bf6a2..bef28c6187eb 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -287,7 +287,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return (is_packet_offload) ? -EINVAL : 0;
}
- if (x->props.flags & XFRM_STATE_ESN &&
+ if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN &&
!dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN");
xso->dev = NULL;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 436d29640ac2..39fb91ff23d9 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -231,9 +231,6 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
- if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
- goto out;
-
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
@@ -269,8 +266,6 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
- if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
- goto out;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
@@ -331,22 +326,26 @@ out:
*/
static int
xfrm_inner_mode_encap_remove(struct xfrm_state *x,
- const struct xfrm_mode *inner_mode,
struct sk_buff *skb)
{
- switch (inner_mode->encap) {
+ switch (x->props.mode) {
case XFRM_MODE_BEET:
- if (inner_mode->family == AF_INET)
+ switch (XFRM_MODE_SKB_CB(skb)->protocol) {
+ case IPPROTO_IPIP:
+ case IPPROTO_BEETPH:
return xfrm4_remove_beet_encap(x, skb);
- if (inner_mode->family == AF_INET6)
+ case IPPROTO_IPV6:
return xfrm6_remove_beet_encap(x, skb);
+ }
break;
case XFRM_MODE_TUNNEL:
- if (inner_mode->family == AF_INET)
+ switch (XFRM_MODE_SKB_CB(skb)->protocol) {
+ case IPPROTO_IPIP:
return xfrm4_remove_tunnel_encap(x, skb);
- if (inner_mode->family == AF_INET6)
+ case IPPROTO_IPV6:
return xfrm6_remove_tunnel_encap(x, skb);
break;
+ }
}
WARN_ON_ONCE(1);
@@ -355,9 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
{
- const struct xfrm_mode *inner_mode = &x->inner_mode;
-
- switch (x->outer_mode.family) {
+ switch (x->props.family) {
case AF_INET:
xfrm4_extract_header(skb);
break;
@@ -369,17 +366,12 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
return -EAFNOSUPPORT;
}
- if (x->sel.family == AF_UNSPEC) {
- inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (!inner_mode)
- return -EAFNOSUPPORT;
- }
-
- switch (inner_mode->family) {
- case AF_INET:
+ switch (XFRM_MODE_SKB_CB(skb)->protocol) {
+ case IPPROTO_IPIP:
+ case IPPROTO_BEETPH:
skb->protocol = htons(ETH_P_IP);
break;
- case AF_INET6:
+ case IPPROTO_IPV6:
skb->protocol = htons(ETH_P_IPV6);
break;
default:
@@ -387,7 +379,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
break;
}
- return xfrm_inner_mode_encap_remove(x, inner_mode, skb);
+ return xfrm_inner_mode_encap_remove(x, skb);
}
/* Remove encapsulation header.
@@ -433,17 +425,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
}
static int xfrm_inner_mode_input(struct xfrm_state *x,
- const struct xfrm_mode *inner_mode,
struct sk_buff *skb)
{
- switch (inner_mode->encap) {
+ switch (x->props.mode) {
case XFRM_MODE_BEET:
case XFRM_MODE_TUNNEL:
return xfrm_prepare_input(x, skb);
case XFRM_MODE_TRANSPORT:
- if (inner_mode->family == AF_INET)
+ if (x->props.family == AF_INET)
return xfrm4_transport_input(x, skb);
- if (inner_mode->family == AF_INET6)
+ if (x->props.family == AF_INET6)
return xfrm6_transport_input(x, skb);
break;
case XFRM_MODE_ROUTEOPTIMIZATION:
@@ -461,7 +452,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
{
const struct xfrm_state_afinfo *afinfo;
struct net *net = dev_net(skb->dev);
- const struct xfrm_mode *inner_mode;
int err;
__be32 seq;
__be32 seq_hi;
@@ -491,7 +481,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- family = x->outer_mode.family;
+ family = x->props.family;
/* An encap_type of -1 indicates async resumption. */
if (encap_type == -1) {
@@ -676,17 +666,7 @@ resume:
XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
- inner_mode = &x->inner_mode;
-
- if (x->sel.family == AF_UNSPEC) {
- inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (inner_mode == NULL) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
- }
-
- if (xfrm_inner_mode_input(x, inner_mode, skb)) {
+ if (xfrm_inner_mode_input(x, skb)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
goto drop;
}
@@ -701,7 +681,7 @@ resume:
* transport mode so the outer address is identical.
*/
daddr = &x->id.daddr;
- family = x->outer_mode.family;
+ family = x->props.family;
err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
if (err < 0) {
@@ -732,7 +712,7 @@ resume:
err = -EAFNOSUPPORT;
rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family);
+ afinfo = xfrm_state_afinfo_get_rcu(x->props.family);
if (likely(afinfo))
err = afinfo->transport_finish(skb, xfrm_gro || async);
rcu_read_unlock();
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff114d68cc43..369e5de8558f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -412,7 +412,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
skb->protocol = htons(ETH_P_IP);
- switch (x->outer_mode.encap) {
+ switch (x->props.mode) {
case XFRM_MODE_BEET:
return xfrm4_beet_encap_add(x, skb);
case XFRM_MODE_TUNNEL:
@@ -435,7 +435,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
skb->ignore_df = 1;
skb->protocol = htons(ETH_P_IPV6);
- switch (x->outer_mode.encap) {
+ switch (x->props.mode) {
case XFRM_MODE_BEET:
return xfrm6_beet_encap_add(x, skb);
case XFRM_MODE_TUNNEL:
@@ -451,22 +451,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
{
- switch (x->outer_mode.encap) {
+ switch (x->props.mode) {
case XFRM_MODE_BEET:
case XFRM_MODE_TUNNEL:
- if (x->outer_mode.family == AF_INET)
+ if (x->props.family == AF_INET)
return xfrm4_prepare_output(x, skb);
- if (x->outer_mode.family == AF_INET6)
+ if (x->props.family == AF_INET6)
return xfrm6_prepare_output(x, skb);
break;
case XFRM_MODE_TRANSPORT:
- if (x->outer_mode.family == AF_INET)
+ if (x->props.family == AF_INET)
return xfrm4_transport_output(x, skb);
- if (x->outer_mode.family == AF_INET6)
+ if (x->props.family == AF_INET6)
return xfrm6_transport_output(x, skb);
break;
case XFRM_MODE_ROUTEOPTIMIZATION:
- if (x->outer_mode.family == AF_INET6)
+ if (x->props.family == AF_INET6)
return xfrm6_ro_output(x, skb);
WARN_ON_ONCE(1);
break;
@@ -875,21 +875,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
- const struct xfrm_mode *inner_mode;
-
- if (x->sel.family == AF_UNSPEC)
- inner_mode = xfrm_ip2inner_mode(x,
- xfrm_af2proto(skb_dst(skb)->ops->family));
- else
- inner_mode = &x->inner_mode;
-
- if (inner_mode == NULL)
- return -EAFNOSUPPORT;
-
- switch (inner_mode->family) {
- case AF_INET:
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
return xfrm4_extract_output(x, skb);
- case AF_INET6:
+ case htons(ETH_P_IPV6):
return xfrm6_extract_output(x, skb);
}
diff --git a/rust/Makefile b/rust/Makefile
index f88d108fbef0..aef85e9e8eeb 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -262,6 +262,20 @@ BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH))
# some configurations, with new GCC versions, etc.
bindgen_extra_c_flags = -w --target=$(BINDGEN_TARGET)
+# Auto variable zero-initialization requires an additional special option with
+# clang that is going to be removed sometime in the future (likely in
+# clang-18), so make sure to pass this option only if clang supports it
+# (libclang major version < 16).
+#
+# https://github.com/llvm/llvm-project/issues/44842
+# https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0-rc2/clang/docs/ReleaseNotes.rst#deprecated-compiler-flags
+ifdef CONFIG_INIT_STACK_ALL_ZERO
+libclang_maj_ver=$(shell $(BINDGEN) $(srctree)/scripts/rust_is_available_bindgen_libclang.h 2>&1 | sed -ne 's/.*clang version \([0-9]*\).*/\1/p')
+ifeq ($(shell expr $(libclang_maj_ver) \< 16), 1)
+bindgen_extra_c_flags += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
+endif
+endif
+
bindgen_c_flags = $(filter-out $(bindgen_skip_c_flags), $(c_flags)) \
$(bindgen_extra_c_flags)
endif
@@ -283,7 +297,7 @@ quiet_cmd_bindgen = BINDGEN $@
$(bindgen_target_cflags) $(bindgen_target_extra)
$(obj)/bindings/bindings_generated.rs: private bindgen_target_flags = \
- $(shell grep -v '^\#\|^$$' $(srctree)/$(src)/bindgen_parameters)
+ $(shell grep -v '^#\|^$$' $(srctree)/$(src)/bindgen_parameters)
$(obj)/bindings/bindings_generated.rs: $(src)/bindings/bindings_helper.h \
$(src)/bindgen_parameters FORCE
$(call if_changed_dep,bindgen)
diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs
index 30103325696d..8009184bf6d7 100644
--- a/rust/kernel/print.rs
+++ b/rust/kernel/print.rs
@@ -18,7 +18,11 @@ use crate::bindings;
// Called from `vsprintf` with format specifier `%pA`.
#[no_mangle]
-unsafe fn rust_fmt_argument(buf: *mut c_char, end: *mut c_char, ptr: *const c_void) -> *mut c_char {
+unsafe extern "C" fn rust_fmt_argument(
+ buf: *mut c_char,
+ end: *mut c_char,
+ ptr: *const c_void,
+) -> *mut c_char {
use fmt::Write;
// SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`.
let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) };
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index b771310fa4a4..cd3d2a6cf1fc 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -408,7 +408,7 @@ impl RawFormatter {
/// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end`
/// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`].
pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self {
- // INVARIANT: The safety requierments guarantee the type invariants.
+ // INVARIANT: The safety requirements guarantee the type invariants.
Self {
beg: pos as _,
pos: pos as _,
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index 5aefd19cdfa1..944f13fe164a 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -76,8 +76,8 @@ struct {
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
- * /sys/kernel/debug/tracing/events/power/cpu_idle/format
- * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ * /sys/kernel/tracing/events/power/cpu_idle/format
+ * /sys/kernel/tracing/events/power/cpu_frequency/format
*
* These two events have same format, so define one common structure.
*/
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 516fbac28b71..6448b7826107 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -65,7 +65,7 @@ static void Usage(void);
static void read_trace_pipe2(void);
static void do_error(char *msg, bool errno_flag);
-#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define TRACEFS "/sys/kernel/tracing/"
static struct bpf_program *bpf_prog;
static struct bpf_object *obj;
@@ -77,7 +77,7 @@ static void read_trace_pipe2(void)
FILE *outf;
char *outFname = "hbm_out.log";
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0) {
printf("Error opening trace_pipe\n");
return;
@@ -315,6 +315,7 @@ static int run_bpf_prog(char *prog, int cg_id)
fout = fopen(fname, "w");
fprintf(fout, "id:%d\n", cg_id);
fprintf(fout, "ERROR: Could not lookup queue_stats\n");
+ fclose(fout);
} else if (stats_flag && qstats.lastPacketTime >
qstats.firstPacketTime) {
long long delta_us = (qstats.lastPacketTime -
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 9b193231024a..f07474c72525 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -39,8 +39,8 @@ struct {
/* Taken from the current format defined in
* include/trace/events/ib_umad.h
* and
- * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format
- * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format
*/
struct ib_umad_rw_args {
u64 pad;
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
index 7078bfcc4f4d..381b2c634784 100755
--- a/samples/bpf/lwt_len_hist.sh
+++ b/samples/bpf/lwt_len_hist.sh
@@ -5,7 +5,7 @@ NS1=lwt_ns1
VETH0=tst_lwt1a
VETH1=tst_lwt1b
BPF_PROG=lwt_len_hist.bpf.o
-TRACE_ROOT=/sys/kernel/debug/tracing
+TRACE_ROOT=/sys/kernel/tracing
function cleanup {
# To reset saved histogram, remove pinned map
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index eb4d94742e6b..23f12b47e9e5 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -110,7 +110,7 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
}
#if 1
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
char prev_comm[TASK_COMM_LEN];
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index a33d74bd3a4b..1e61f2180470 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -235,7 +235,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
struct bpf_link *link;
ssize_t bytes;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events",
event_type);
kfd = open(buf, O_WRONLY | O_TRUNC, 0);
CHECK_PERROR_RET(kfd < 0);
@@ -252,7 +252,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
close(kfd);
kfd = -1;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id",
event_type, event_alias);
efd = open(buf, O_RDONLY, 0);
CHECK_PERROR_RET(efd < 0);
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
index 2e9f5126963b..0bf2d0f6bf4b 100755
--- a/samples/bpf/test_lwt_bpf.sh
+++ b/samples/bpf/test_lwt_bpf.sh
@@ -21,7 +21,7 @@ IP_LOCAL="192.168.99.1"
PROG_SRC="test_lwt_bpf.c"
BPF_PROG="test_lwt_bpf.o"
-TRACE_ROOT=/sys/kernel/debug/tracing
+TRACE_ROOT=/sys/kernel/tracing
CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context)
function lookup_mac()
diff --git a/samples/bpf/test_overhead_tp.bpf.c b/samples/bpf/test_overhead_tp.bpf.c
index 67cab3881969..8b498328e961 100644
--- a/samples/bpf/test_overhead_tp.bpf.c
+++ b/samples/bpf/test_overhead_tp.bpf.c
@@ -7,7 +7,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
-/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
+/* from /sys/kernel/tracing/events/task/task_rename/format */
struct task_rename {
__u64 pad;
__u32 pid;
@@ -21,7 +21,7 @@ int prog(struct task_rename *ctx)
return 0;
}
-/* from /sys/kernel/debug/tracing/events/fib/fib_table_lookup/format */
+/* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */
struct fib_table_lookup {
__u64 pad;
__u32 tb_id;
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index 61f72eb8d9be..4d90691505b1 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -27,21 +27,6 @@ fi ; \
tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \
--transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3)
-# tarball compression
-# ---------------------------------------------------------------------------
-
-%.tar.gz: %.tar
- $(call cmd,gzip)
-
-%.tar.bz2: %.tar
- $(call cmd,bzip2)
-
-%.tar.xz: %.tar
- $(call cmd,xzmisc)
-
-%.tar.zst: %.tar
- $(call cmd,zstd)
-
# Git
# ---------------------------------------------------------------------------
@@ -57,16 +42,24 @@ check-git:
false; \
fi
+git-config-tar.gz = -c tar.tar.gz.command="$(KGZIP)"
+git-config-tar.bz2 = -c tar.tar.bz2.command="$(KBZIP2)"
+git-config-tar.xz = -c tar.tar.xz.command="$(XZ)"
+git-config-tar.zst = -c tar.tar.zst.command="$(ZSTD)"
+
+quiet_cmd_archive = ARCHIVE $@
+ cmd_archive = git -C $(srctree) $(git-config-tar$(suffix $@)) archive \
+ --output=$$(realpath $@) --prefix=$(basename $@)/ $(archive-args)
+
# Linux source tarball
# ---------------------------------------------------------------------------
-quiet_cmd_archive_linux = ARCHIVE $@
- cmd_archive_linux = \
- git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ $$(cat $<)
+linux-tarballs := $(addprefix linux, .tar.gz)
-targets += linux.tar
-linux.tar: .tmp_HEAD FORCE
- $(call if_changed,archive_linux)
+targets += $(linux-tarballs)
+$(linux-tarballs): archive-args = $$(cat $<)
+$(linux-tarballs): .tmp_HEAD FORCE
+ $(call if_changed,archive)
# rpm-pkg
# ---------------------------------------------------------------------------
@@ -94,7 +87,7 @@ binrpm-pkg:
$(UTS_MACHINE)-linux -bb $(objtree)/binkernel.spec
quiet_cmd_debianize = GEN $@
- cmd_debianize = $(srctree)/scripts/package/mkdebian
+ cmd_debianize = $(srctree)/scripts/package/mkdebian $(mkdebian-opts)
debian: FORCE
$(call cmd,debianize)
@@ -103,6 +96,7 @@ PHONY += debian-orig
debian-orig: private source = $(shell dpkg-parsechangelog -S Source)
debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/-[^-]*$$//')
debian-orig: private orig-name = $(source)_$(version).orig.tar.gz
+debian-orig: mkdebian-opts = --need-source
debian-orig: linux.tar.gz debian
$(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \
ln -f $< ../$(orig-name); \
@@ -145,10 +139,17 @@ tar-install: FORCE
$(Q)$(MAKE) -f $(srctree)/Makefile
+$(Q)$(srctree)/scripts/package/buildtar $@
+compress-tar.gz = -I "$(KGZIP)"
+compress-tar.bz2 = -I "$(KBZIP2)"
+compress-tar.xz = -I "$(XZ)"
+compress-tar.zst = -I "$(ZSTD)"
+
quiet_cmd_tar = TAR $@
- cmd_tar = cd $<; tar cf ../$@ --owner=root --group=root --sort=name *
+ cmd_tar = cd $<; tar cf ../$@ $(compress-tar$(suffix $@)) --owner=root --group=root --sort=name *
-linux-$(KERNELRELEASE)-$(ARCH).tar: tar-install
+dir-tarballs := $(addprefix linux-$(KERNELRELEASE)-$(ARCH), .tar .tar.gz .tar.bz2 .tar.xz .tar.zst)
+
+$(dir-tarballs): tar-install
$(call cmd,tar)
PHONY += dir-pkg
@@ -180,16 +181,17 @@ quiet_cmd_perf_version_file = GEN $@
.tmp_perf/PERF-VERSION-FILE: .tmp_HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN | .tmp_perf
$(call cmd,perf_version_file)
-quiet_cmd_archive_perf = ARCHIVE $@
- cmd_archive_perf = \
- git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ \
- --add-file=$$(realpath $(word 2, $^)) \
+perf-archive-args = --add-file=$$(realpath $(word 2, $^)) \
--add-file=$$(realpath $(word 3, $^)) \
$$(cat $(word 2, $^))^{tree} $$(cat $<)
-targets += perf-$(KERNELVERSION).tar
-perf-$(KERNELVERSION).tar: tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE
- $(call if_changed,archive_perf)
+
+perf-tarballs := $(addprefix perf-$(KERNELVERSION), .tar .tar.gz .tar.bz2 .tar.xz .tar.zst)
+
+targets += $(perf-tarballs)
+$(perf-tarballs): archive-args = $(perf-archive-args)
+$(perf-tarballs): tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE
+ $(call if_changed,archive)
PHONY += perf-tar-src-pkg
perf-tar-src-pkg: perf-$(KERNELVERSION).tar
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 38d51e05c7a2..eaae2ce78381 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -383,7 +383,7 @@ class PrinterRST(Printer):
.. Copyright (C) All BPF authors and contributors from 2014 to present.
.. See git log include/uapi/linux/bpf.h in kernel tree for details.
..
-.. SPDX-License-Identifier: Linux-man-pages-copyleft
+.. SPDX-License-Identifier: Linux-man-pages-copyleft
..
.. Please do not edit this file. It was generated from the documentation
.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index ecc7ea9a4dcf..946e250c1b2a 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -104,7 +104,10 @@ def generate_crates(srctree, objtree, sysroot_src):
name = path.name.replace(".rs", "")
# Skip those that are not crate roots.
- if f"{name}.o" not in open(path.parent / "Makefile").read():
+ try:
+ if f"{name}.o" not in open(path.parent / "Makefile").read():
+ continue
+ except FileNotFoundError:
continue
logging.info("Adding %s", name)
diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh
index 28b3831a7593..464761a7cf7f 100755
--- a/scripts/is_rust_module.sh
+++ b/scripts/is_rust_module.sh
@@ -13,4 +13,4 @@ set -e
#
# In the future, checking for the `.comment` section may be another
# option, see https://github.com/rust-lang/rust/pull/97550.
-${NM} "$*" | grep -qE '^[0-9a-fA-F]+ r _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$'
+${NM} "$*" | grep -qE '^[0-9a-fA-F]+ [Rr] _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$'
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 32620de473ad..902eb429b9db 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -145,7 +145,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
NEW_VAL=$(grep -w $CFG $MERGE_FILE)
BUILTIN_FLAG=false
if [ "$BUILTIN" = "true" ] && [ "${NEW_VAL#CONFIG_*=}" = "m" ] && [ "${PREV_VAL#CONFIG_*=}" = "y" ]; then
- ${WARNOVVERIDE} Previous value: $PREV_VAL
+ ${WARNOVERRIDE} Previous value: $PREV_VAL
${WARNOVERRIDE} New value: $NEW_VAL
${WARNOVERRIDE} -y passed, will not demote y to m
${WARNOVERRIDE}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index efff8078e395..9466b6a2abae 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod)
if (!isdigit(*p))
continue; /* skip this line */
- crc = strtol(p, &p, 0);
+ crc = strtoul(p, &p, 0);
if (*p != '\n')
continue; /* skip this line */
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index c5ae57167d7c..7b23f52c70c5 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -162,6 +162,7 @@ install_linux_image_dbg () {
install_kernel_headers () {
pdir=$1
+ version=$2
rm -rf $pdir
@@ -229,7 +230,7 @@ do
linux-libc-dev)
install_libc_headers debian/linux-libc-dev;;
linux-headers-*)
- install_kernel_headers debian/linux-headers;;
+ install_kernel_headers debian/linux-headers ${package#linux-headers-};;
esac
done
diff --git a/scripts/package/gen-diff-patch b/scripts/package/gen-diff-patch
index f842ab50a780..8a98b7bb78a0 100755
--- a/scripts/package/gen-diff-patch
+++ b/scripts/package/gen-diff-patch
@@ -1,44 +1,36 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
-diff_patch="${1}"
-untracked_patch="${2}"
-srctree=$(dirname $0)/../..
+diff_patch=$1
-rm -f ${diff_patch} ${untracked_patch}
+mkdir -p "$(dirname "${diff_patch}")"
-if ! ${srctree}/scripts/check-git; then
- exit
-fi
-
-mkdir -p "$(dirname ${diff_patch})" "$(dirname ${untracked_patch})"
+git -C "${srctree:-.}" diff HEAD > "${diff_patch}"
-git -C "${srctree}" diff HEAD > "${diff_patch}"
-
-if [ ! -s "${diff_patch}" ]; then
- rm -f "${diff_patch}"
+if [ ! -s "${diff_patch}" ] ||
+ [ -z "$(git -C "${srctree:-.}" ls-files --other --exclude-standard | head -n1)" ]; then
exit
fi
-git -C ${srctree} status --porcelain --untracked-files=all |
-while read stat path
-do
- if [ "${stat}" = '??' ]; then
-
- if ! diff -u /dev/null "${srctree}/${path}" > .tmp_diff &&
- ! head -n1 .tmp_diff | grep -q "Binary files"; then
- {
- echo "--- /dev/null"
- echo "+++ linux/$path"
- cat .tmp_diff | tail -n +3
- } >> ${untracked_patch}
- fi
- fi
-done
-
-rm -f .tmp_diff
-
-if [ ! -s "${diff_patch}" ]; then
- rm -f "${diff_patch}"
- exit
-fi
+# The source tarball, which is generated by 'git archive', contains everything
+# you committed in the repository. If you have local diff ('git diff HEAD'),
+# it will go into ${diff_patch}. If untracked files are remaining, the resulting
+# source package may not be correct.
+#
+# Examples:
+# - You modified a source file to add #include "new-header.h"
+# but forgot to add new-header.h
+# - You modified a Makefile to add 'obj-$(CONFIG_FOO) += new-dirver.o'
+# but you forgot to add new-driver.c
+#
+# You need to commit them, or at least stage them by 'git add'.
+#
+# This script does not take care of untracked files because doing so would
+# introduce additional complexity. Instead, print a warning message here if
+# untracked files are found.
+# If all untracked files are just garbage, you can ignore this warning.
+echo >&2 "============================ WARNING ============================"
+echo >&2 "Your working tree has diff from HEAD, and also untracked file(s)."
+echo >&2 "Please make sure you did 'git add' for all new files you need in"
+echo >&2 "the source package."
+echo >&2 "================================================================="
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index e20a2b5be9eb..a4c2c2276223 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -84,7 +84,66 @@ set_debarch() {
fi
}
+# Create debian/source/ if it is a source package build
+gen_source ()
+{
+ mkdir -p debian/source
+
+ echo "3.0 (quilt)" > debian/source/format
+
+ {
+ echo "diff-ignore"
+ echo "extend-diff-ignore = .*"
+ } > debian/source/local-options
+
+ # Add .config as a patch
+ mkdir -p debian/patches
+ {
+ echo "Subject: Add .config"
+ echo "Author: ${maintainer}"
+ echo
+ echo "--- /dev/null"
+ echo "+++ linux/.config"
+ diff -u /dev/null "${KCONFIG_CONFIG}" | tail -n +3
+ } > debian/patches/config.patch
+ echo config.patch > debian/patches/series
+
+ "${srctree}/scripts/package/gen-diff-patch" debian/patches/diff.patch
+ if [ -s debian/patches/diff.patch ]; then
+ sed -i "
+ 1iSubject: Add local diff
+ 1iAuthor: ${maintainer}
+ 1i
+ " debian/patches/diff.patch
+
+ echo diff.patch >> debian/patches/series
+ else
+ rm -f debian/patches/diff.patch
+ fi
+}
+
rm -rf debian
+mkdir debian
+
+email=${DEBEMAIL-$EMAIL}
+
+# use email string directly if it contains <email>
+if echo "${email}" | grep -q '<.*>'; then
+ maintainer=${email}
+else
+ # or construct the maintainer string
+ user=${KBUILD_BUILD_USER-$(id -nu)}
+ name=${DEBFULLNAME-${user}}
+ if [ -z "${email}" ]; then
+ buildhost=${KBUILD_BUILD_HOST-$(hostname -f 2>/dev/null || hostname)}
+ email="${user}@${buildhost}"
+ fi
+ maintainer="${name} <${email}>"
+fi
+
+if [ "$1" = --need-source ]; then
+ gen_source
+fi
# Some variables and settings used throughout the script
version=$KERNELRELEASE
@@ -104,22 +163,6 @@ fi
debarch=
set_debarch
-email=${DEBEMAIL-$EMAIL}
-
-# use email string directly if it contains <email>
-if echo $email | grep -q '<.*>'; then
- maintainer=$email
-else
- # or construct the maintainer string
- user=${KBUILD_BUILD_USER-$(id -nu)}
- name=${DEBFULLNAME-$user}
- if [ -z "$email" ]; then
- buildhost=${KBUILD_BUILD_HOST-$(hostname -f 2>/dev/null || hostname)}
- email="$user@$buildhost"
- fi
- maintainer="$name <$email>"
-fi
-
# Try to determine distribution
if [ -n "$KDEB_CHANGELOG_DIST" ]; then
distribution=$KDEB_CHANGELOG_DIST
@@ -132,34 +175,6 @@ else
echo >&2 "Install lsb-release or set \$KDEB_CHANGELOG_DIST explicitly"
fi
-mkdir -p debian/source/
-echo "3.0 (quilt)" > debian/source/format
-
-{
- echo "diff-ignore"
- echo "extend-diff-ignore = .*"
-} > debian/source/local-options
-
-# Add .config as a patch
-mkdir -p debian/patches
-{
- echo "Subject: Add .config"
- echo "Author: ${maintainer}"
- echo
- echo "--- /dev/null"
- echo "+++ linux/.config"
- diff -u /dev/null "${KCONFIG_CONFIG}" | tail -n +3
-} > debian/patches/config
-echo config > debian/patches/series
-
-$(dirname $0)/gen-diff-patch debian/patches/diff.patch debian/patches/untracked.patch
-if [ -f debian/patches/diff.patch ]; then
- echo diff.patch >> debian/patches/series
-fi
-if [ -f debian/patches/untracked.patch ]; then
- echo untracked.patch >> debian/patches/series
-fi
-
echo $debarch > debian/arch
extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)"
extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)"
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index b7d1dc28a5d6..fc8ad3fbc0a9 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -19,8 +19,7 @@ else
mkdir -p rpmbuild/SOURCES
cp linux.tar.gz rpmbuild/SOURCES
cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
- $(dirname $0)/gen-diff-patch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch
- touch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch
+ "${srctree}/scripts/package/gen-diff-patch" rpmbuild/SOURCES/diff.patch
fi
if grep -q CONFIG_MODULES=y include/config/auto.conf; then
@@ -56,7 +55,6 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
$S Source0: linux.tar.gz
$S Source1: config
$S Source2: diff.patch
-$S Source3: untracked.patch
Provides: $PROVIDES
$S BuildRequires: bc binutils bison dwarves
$S BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
@@ -94,12 +92,7 @@ $S$M
$S %prep
$S %setup -q -n linux
$S cp %{SOURCE1} .config
-$S if [ -s %{SOURCE2} ]; then
-$S patch -p1 < %{SOURCE2}
-$S fi
-$S if [ -s %{SOURCE3} ]; then
-$S patch -p1 < %{SOURCE3}
-$S fi
+$S patch -p1 < %{SOURCE2}
$S
$S %build
$S $MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}
diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c
index 53e094cc411f..dfe783d01d7d 100644
--- a/sound/firewire/tascam/tascam-stream.c
+++ b/sound/firewire/tascam/tascam-stream.c
@@ -490,7 +490,7 @@ int snd_tscm_stream_start_duplex(struct snd_tscm *tscm, unsigned int rate)
// packet is important for media clock recovery.
err = amdtp_domain_start(&tscm->domain, tx_init_skip_cycles, true, true);
if (err < 0)
- return err;
+ goto error;
if (!amdtp_domain_wait_ready(&tscm->domain, READY_TIMEOUT_MS)) {
err = -ETIMEDOUT;
diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c
index 65012af6a36e..f58b14b49045 100644
--- a/sound/i2c/cs8427.c
+++ b/sound/i2c/cs8427.c
@@ -561,10 +561,13 @@ int snd_cs8427_iec958_active(struct snd_i2c_device *cs8427, int active)
if (snd_BUG_ON(!cs8427))
return -ENXIO;
chip = cs8427->private_data;
- if (active)
+ if (active) {
memcpy(chip->playback.pcm_status,
chip->playback.def_status, 24);
- chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
+ chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
+ } else {
+ chip->playback.pcm_ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE;
+ }
snd_ctl_notify(cs8427->bus->card,
SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO,
&chip->playback.pcm_ctl->id);
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 48af77ae8020..6ec394fb1846 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1236,7 +1236,7 @@ static int snd_emu10k1_capture_mic_close(struct snd_pcm_substream *substream)
{
struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream);
- emu->capture_interrupt = NULL;
+ emu->capture_mic_interrupt = NULL;
emu->pcm_capture_mic_substream = NULL;
return 0;
}
@@ -1344,7 +1344,7 @@ static int snd_emu10k1_capture_efx_close(struct snd_pcm_substream *substream)
{
struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream);
- emu->capture_interrupt = NULL;
+ emu->capture_efx_interrupt = NULL;
emu->pcm_capture_efx_substream = NULL;
return 0;
}
@@ -1781,17 +1781,21 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device)
struct snd_kcontrol *kctl;
int err;
- err = snd_pcm_new(emu->card, "emu10k1 efx", device, 8, 1, &pcm);
+ err = snd_pcm_new(emu->card, "emu10k1 efx", device, emu->audigy ? 0 : 8, 1, &pcm);
if (err < 0)
return err;
pcm->private_data = emu;
- snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops);
+ if (!emu->audigy)
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops);
snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_emu10k1_capture_efx_ops);
pcm->info_flags = 0;
- strcpy(pcm->name, "Multichannel Capture/PT Playback");
+ if (emu->audigy)
+ strcpy(pcm->name, "Multichannel Capture");
+ else
+ strcpy(pcm->name, "Multichannel Capture/PT Playback");
emu->pcm_efx = pcm;
/* EFX capture - record the "FXBUS2" channels, by default we connect the EXTINs
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 9ea633fe9339..5c6980394dce 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -81,6 +81,7 @@ struct hdmi_spec_per_pin {
struct delayed_work work;
struct hdmi_pcm *pcm; /* pointer to spec->pcm_rec[n] dynamically*/
int pcm_idx; /* which pcm is attached. -1 means no pcm is attached */
+ int prev_pcm_idx; /* previously assigned pcm index */
int repoll_count;
bool setup; /* the stream has been set up by prepare callback */
bool silent_stream;
@@ -1380,9 +1381,17 @@ static void hdmi_attach_hda_pcm(struct hdmi_spec *spec,
/* pcm already be attached to the pin */
if (per_pin->pcm)
return;
+ /* try the previously used slot at first */
+ idx = per_pin->prev_pcm_idx;
+ if (idx >= 0) {
+ if (!test_bit(idx, &spec->pcm_bitmap))
+ goto found;
+ per_pin->prev_pcm_idx = -1; /* no longer valid, clear it */
+ }
idx = hdmi_find_pcm_slot(spec, per_pin);
if (idx == -EBUSY)
return;
+ found:
per_pin->pcm_idx = idx;
per_pin->pcm = get_hdmi_pcm(spec, idx);
set_bit(idx, &spec->pcm_bitmap);
@@ -1398,6 +1407,7 @@ static void hdmi_detach_hda_pcm(struct hdmi_spec *spec,
return;
idx = per_pin->pcm_idx;
per_pin->pcm_idx = -1;
+ per_pin->prev_pcm_idx = idx; /* remember the previous index */
per_pin->pcm = NULL;
if (idx >= 0 && idx < spec->pcm_used)
clear_bit(idx, &spec->pcm_bitmap);
@@ -1924,6 +1934,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid)
per_pin->pcm = NULL;
per_pin->pcm_idx = -1;
+ per_pin->prev_pcm_idx = -1;
per_pin->pin_nid = pin_nid;
per_pin->pin_nid_idx = spec->num_nids;
per_pin->dev_id = i;
@@ -4593,7 +4604,7 @@ HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi),
HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi),
HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi),
HDA_CODEC_ENTRY(0x80862818, "Raptorlake HDMI", patch_i915_tgl_hdmi),
-HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_adlp_hdmi),
+HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_tgl_hdmi),
HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_adlp_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a2706fd87b14..3b9f077a227f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2624,6 +2624,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
+ SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
@@ -6959,6 +6960,8 @@ enum {
ALC269_FIXUP_DELL_M101Z,
ALC269_FIXUP_SKU_IGNORE,
ALC269_FIXUP_ASUS_G73JW,
+ ALC269_FIXUP_ASUS_N7601ZM_PINS,
+ ALC269_FIXUP_ASUS_N7601ZM,
ALC269_FIXUP_LENOVO_EAPD,
ALC275_FIXUP_SONY_HWEQ,
ALC275_FIXUP_SONY_DISABLE_AAMIX,
@@ -7255,6 +7258,29 @@ static const struct hda_fixup alc269_fixups[] = {
{ }
}
},
+ [ALC269_FIXUP_ASUS_N7601ZM_PINS] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03A11050 },
+ { 0x1a, 0x03A11C30 },
+ { 0x21, 0x03211420 },
+ { }
+ }
+ },
+ [ALC269_FIXUP_ASUS_N7601ZM] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ {0x20, AC_VERB_SET_COEF_INDEX, 0x62},
+ {0x20, AC_VERB_SET_PROC_COEF, 0xa007},
+ {0x20, AC_VERB_SET_COEF_INDEX, 0x10},
+ {0x20, AC_VERB_SET_PROC_COEF, 0x8420},
+ {0x20, AC_VERB_SET_COEF_INDEX, 0x0f},
+ {0x20, AC_VERB_SET_PROC_COEF, 0x7774},
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_ASUS_N7601ZM_PINS,
+ },
[ALC269_FIXUP_LENOVO_EAPD] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -9443,6 +9469,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b66, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED),
@@ -9464,6 +9491,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM),
SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC),
@@ -9661,6 +9689,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index a794a01a68ca..61258b0aac8d 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1707,6 +1707,7 @@ static const struct snd_pci_quirk stac925x_fixup_tbl[] = {
};
static const struct hda_pintbl ref92hd73xx_pin_configs[] = {
+ // Port A-H
{ 0x0a, 0x02214030 },
{ 0x0b, 0x02a19040 },
{ 0x0c, 0x01a19020 },
@@ -1715,9 +1716,12 @@ static const struct hda_pintbl ref92hd73xx_pin_configs[] = {
{ 0x0f, 0x01014010 },
{ 0x10, 0x01014020 },
{ 0x11, 0x01014030 },
+ // CD in
{ 0x12, 0x02319040 },
+ // Digial Mic ins
{ 0x13, 0x90a000f0 },
{ 0x14, 0x90a000f0 },
+ // Digital outs
{ 0x22, 0x01452050 },
{ 0x23, 0x01452050 },
{}
@@ -1758,6 +1762,7 @@ static const struct hda_pintbl alienware_m17x_pin_configs[] = {
};
static const struct hda_pintbl intel_dg45id_pin_configs[] = {
+ // Analog outputs
{ 0x0a, 0x02214230 },
{ 0x0b, 0x02A19240 },
{ 0x0c, 0x01013214 },
@@ -1765,6 +1770,9 @@ static const struct hda_pintbl intel_dg45id_pin_configs[] = {
{ 0x0e, 0x01A19250 },
{ 0x0f, 0x01011212 },
{ 0x10, 0x01016211 },
+ // Digital output
+ { 0x22, 0x01451380 },
+ { 0x23, 0x40f000f0 },
{}
};
@@ -1955,6 +1963,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = {
"DFI LanParty", STAC_92HD73XX_REF),
SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
"DFI LanParty", STAC_92HD73XX_REF),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5001,
+ "Intel DP45SG", STAC_92HD73XX_INTEL),
SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5002,
"Intel DG45ID", STAC_92HD73XX_INTEL),
SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5003,
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 4a69ce702360..0acdf0156f07 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -269,6 +269,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "8A43"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_BOARD_NAME, "8A22"),
+ }
+ },
{}
};
diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index 0068780fe0a7..1c1f211a8e2e 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -2022,6 +2022,11 @@ static int da7213_i2c_probe(struct i2c_client *i2c)
return ret;
}
+static void da7213_i2c_remove(struct i2c_client *i2c)
+{
+ pm_runtime_disable(&i2c->dev);
+}
+
static int __maybe_unused da7213_runtime_suspend(struct device *dev)
{
struct da7213_priv *da7213 = dev_get_drvdata(dev);
@@ -2065,6 +2070,7 @@ static struct i2c_driver da7213_i2c_driver = {
.pm = &da7213_pm,
},
.probe_new = da7213_i2c_probe,
+ .remove = da7213_i2c_remove,
.id_table = da7213_i2c_id,
};
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index ed4f7cdda04f..8b6b76029694 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -436,23 +436,28 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev,
return 0;
}
-static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai,
- unsigned int tx_mask, unsigned int rx_mask,
- int slots, int slot_width)
+static int hdac_hdmi_set_stream(struct snd_soc_dai *dai,
+ void *stream, int direction)
{
struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
struct hdac_device *hdev = hdmi->hdev;
struct hdac_hdmi_dai_port_map *dai_map;
struct hdac_hdmi_pcm *pcm;
+ struct hdac_stream *hstream;
- dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask);
+ if (!stream)
+ return -EINVAL;
+
+ hstream = (struct hdac_stream *)stream;
+
+ dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, hstream->stream_tag);
dai_map = &hdmi->dai_map[dai->id];
pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt);
if (pcm)
- pcm->stream_tag = (tx_mask << 4);
+ pcm->stream_tag = (hstream->stream_tag << 4);
return 0;
}
@@ -1544,7 +1549,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = {
.startup = hdac_hdmi_pcm_open,
.shutdown = hdac_hdmi_pcm_close,
.hw_params = hdac_hdmi_set_hw_params,
- .set_tdm_slot = hdac_hdmi_set_tdm_slot,
+ .set_stream = hdac_hdmi_set_stream,
};
/*
diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index a73a7d7a1c0a..faba4237bd3d 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -3670,9 +3670,9 @@ static int __maybe_unused rx_macro_runtime_suspend(struct device *dev)
regcache_cache_only(rx->regmap, true);
regcache_mark_dirty(rx->regmap);
- clk_disable_unprepare(rx->mclk);
- clk_disable_unprepare(rx->npl);
clk_disable_unprepare(rx->fsgen);
+ clk_disable_unprepare(rx->npl);
+ clk_disable_unprepare(rx->mclk);
return 0;
}
diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index 473d3cd39554..589c490a8c48 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -2098,9 +2098,9 @@ static int __maybe_unused tx_macro_runtime_suspend(struct device *dev)
regcache_cache_only(tx->regmap, true);
regcache_mark_dirty(tx->regmap);
- clk_disable_unprepare(tx->mclk);
- clk_disable_unprepare(tx->npl);
clk_disable_unprepare(tx->fsgen);
+ clk_disable_unprepare(tx->npl);
+ clk_disable_unprepare(tx->mclk);
return 0;
}
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
index ba7480f3831e..3f6f1bdd4e03 100644
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -2506,9 +2506,9 @@ static int __maybe_unused wsa_macro_runtime_suspend(struct device *dev)
regcache_cache_only(wsa->regmap, true);
regcache_mark_dirty(wsa->regmap);
- clk_disable_unprepare(wsa->mclk);
- clk_disable_unprepare(wsa->npl);
clk_disable_unprepare(wsa->fsgen);
+ clk_disable_unprepare(wsa->npl);
+ clk_disable_unprepare(wsa->mclk);
return 0;
}
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 79e0039c79a3..5a12940ef907 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -533,6 +533,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream,
/* Please keep this list alphabetically sorted */
static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+ { /* Acer Iconia One 7 B1-750 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
+ },
+ .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+ BYT_RT5640_JD_SRC_JD1_IN4P |
+ BYT_RT5640_OVCD_TH_1500UA |
+ BYT_RT5640_OVCD_SF_0P75 |
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
+ },
{ /* Acer Iconia Tab 8 W1-810 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index d2ed807abde9..767fa89d0870 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -213,6 +213,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
SOF_SDW_PCH_DMIC |
RT711_JD1),
},
+ {
+ /* NUC15 'Rooks County' LAPRC510 and LAPRC710 skews */
+ .callback = sof_sdw_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LAPRC"),
+ },
+ .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+ SOF_SDW_PCH_DMIC |
+ RT711_JD2_100K),
+ },
/* TigerLake-SDCA devices */
{
.callback = sof_sdw_quirk_cb,
diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
index 28dd2046e4ac..d8c80041388a 100644
--- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
@@ -354,6 +354,20 @@ static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link3[] = {
{}
};
+static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link2[] = {
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(rt711_sdca_0_adr),
+ .adr_d = rt711_sdca_0_adr,
+ },
+ {
+ .mask = BIT(2),
+ .num_adr = ARRAY_SIZE(rt1316_2_single_adr),
+ .adr_d = rt1316_2_single_adr,
+ },
+ {}
+};
+
static const struct snd_soc_acpi_adr_device mx8373_2_adr[] = {
{
.adr = 0x000223019F837300ull,
@@ -625,6 +639,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = {
.sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l3.tplg",
},
{
+ .link_mask = 0x5, /* 2 active links required */
+ .links = adl_sdw_rt711_link0_rt1316_link2,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l2.tplg",
+ },
+ {
.link_mask = 0x1, /* link0 required */
.links = adl_rvp,
.drv_name = "sof_sdw",
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 5eb056b942ce..7958c9defd49 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1661,10 +1661,14 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream)
struct snd_pcm_hardware *hw = &runtime->hw;
struct snd_soc_dai *dai;
int stream = substream->stream;
+ u64 formats = hw->formats;
int i;
soc_pcm_hw_init(hw);
+ if (formats)
+ hw->formats &= formats;
+
for_each_rtd_cpu_dais(fe, i, dai) {
struct snd_soc_pcm_stream *cpu_stream;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index a623707c8ffc..669b99a4f76e 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1805,6 +1805,14 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route *
u32 header, extension;
int ret;
+ if (!src_fw_module || !sink_fw_module) {
+ /* The NULL module will print as "(efault)" */
+ dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n",
+ src_fw_module->man4_module_entry.name,
+ sink_fw_module->man4_module_entry.name);
+ return -ENODEV;
+ }
+
sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget,
SOF_PIN_TYPE_SOURCE);
if (sroute->src_queue_id < 0) {
diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
index 8ede4b952997..246b56d24a6f 100644
--- a/sound/soc/sof/ipc4.c
+++ b/sound/soc/sof/ipc4.c
@@ -405,6 +405,9 @@ static int sof_ipc4_tx_msg(struct snd_sof_dev *sdev, void *msg_data, size_t msg_
static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data,
size_t payload_bytes, bool set)
{
+ const struct sof_dsp_power_state target_state = {
+ .state = SOF_DSP_PM_D0,
+ };
size_t payload_limit = sdev->ipc->max_payload_size;
struct sof_ipc4_msg *ipc4_msg = data;
struct sof_ipc4_msg tx = {{ 0 }};
@@ -435,6 +438,11 @@ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data,
tx.extension |= SOF_IPC4_MOD_EXT_MSG_FIRST_BLOCK(1);
+ /* ensure the DSP is in D0i0 before sending IPC */
+ ret = snd_sof_dsp_set_power_state(sdev, &target_state);
+ if (ret < 0)
+ return ret;
+
/* Serialise IPC TX */
mutex_lock(&sdev->ipc->tx_mutex);
diff --git a/tools/Makefile b/tools/Makefile
index e497875fc7e3..37e9f6804832 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -39,7 +39,7 @@ help:
@echo ' turbostat - Intel CPU idle stats and freq reporting tool'
@echo ' usb - USB testing tools'
@echo ' virtio - vhost test module'
- @echo ' vm - misc vm tools'
+ @echo ' mm - misc mm tools'
@echo ' wmi - WMI interface examples'
@echo ' x86_energy_perf_policy - Intel energy policy tool'
@echo ''
@@ -69,7 +69,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
+cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -118,7 +118,7 @@ kvm_stat: FORCE
all: acpi cgroup counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
- virtio vm bpf x86_energy_perf_policy \
+ virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
pci debugging tracing thermal thermometer thermal-engine
@@ -128,7 +128,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
+cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -158,7 +158,7 @@ kvm_stat_install:
install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
- virtio_install vm_install bpf_install x86_energy_perf_policy_install \
+ virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install pci_install debugging_install intel-speed-select_install \
tracing_install thermometer_install thermal-engine_install
@@ -169,7 +169,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
+cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -211,7 +211,7 @@ build_clean:
clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
- vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
index d4e32b3d4843..00b4ba1e5cdf 100644
--- a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
+++ b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
@@ -2,7 +2,7 @@
#ifndef __ASM_LOONGARCH_BITSPERLONG_H
#define __ASM_LOONGARCH_BITSPERLONG_H
-#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+#define __BITS_PER_LONG (__SIZEOF_LONG__ * 8)
#include <asm-generic/bitsperlong.h>
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 14de72544995..9443c524bb76 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -28,8 +28,8 @@ PROG COMMANDS
=============
| **bpftool** **prog** { **show** | **list** } [*PROG*]
-| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
-| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
+| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
+| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
| **bpftool** **prog pin** *PROG* *FILE*
| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
@@ -88,7 +88,7 @@ DESCRIPTION
programs. On such kernels bpftool will automatically emit this
information as well.
- **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
+ **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
output in human-readable format. In this case, **opcodes**
@@ -106,11 +106,10 @@ DESCRIPTION
CFG in DOT format, on standard output.
If the programs have line_info available, the source line will
- be displayed by default. If **linum** is specified,
- the filename, line number and line column will also be
- displayed on top of the source line.
+ be displayed. If **linum** is specified, the filename, line
+ number and line column will also be displayed.
- **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }]
+ **bpftool prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
Dump jited image (host machine code) of the program.
If *FILE* is specified image will be written to a file,
@@ -120,9 +119,8 @@ DESCRIPTION
**opcodes** controls if raw opcodes will be printed.
If the prog has line_info available, the source line will
- be displayed by default. If **linum** is specified,
- the filename, line number and line column will also be
- displayed on top of the source line.
+ be displayed. If **linum** is specified, the filename, line
+ number and line column will also be displayed.
**bpftool prog pin** *PROG* *FILE*
Pin program *PROG* as *FILE*.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 35f26f7c1124..e7234d1a5306 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -255,20 +255,23 @@ _bpftool_map_update_get_name()
_bpftool()
{
- local cur prev words objword
+ local cur prev words objword json=0
_init_completion || return
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
+ if _bpftool_search_list -j --json -p --pretty; then
+ json=1
+ fi
# Deal with simplest keywords
case $prev in
- help|hex|opcodes|visual|linum)
+ help|hex)
return 0
;;
tag)
@@ -366,13 +369,16 @@ _bpftool()
return 0
;;
*)
- _bpftool_once_attr 'file'
- if _bpftool_search_list 'xlated'; then
- COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \
- "$cur" ) )
- else
- COMPREPLY+=( $( compgen -W 'opcodes linum' -- \
- "$cur" ) )
+ # "file" is not compatible with other keywords here
+ if _bpftool_search_list 'file'; then
+ return 0
+ fi
+ if ! _bpftool_search_list 'linum opcodes visual'; then
+ _bpftool_once_attr 'file'
+ fi
+ _bpftool_once_attr 'linum opcodes'
+ if _bpftool_search_list 'xlated' && [[ "$json" == 0 ]]; then
+ _bpftool_once_attr 'visual'
fi
return 0
;;
@@ -502,10 +508,7 @@ _bpftool()
;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
- _bpftool_once_attr 'type'
- _bpftool_once_attr 'dev'
- _bpftool_once_attr 'pinmaps'
- _bpftool_once_attr 'autoattach'
+ _bpftool_once_attr 'type dev pinmaps autoattach'
return 0
;;
esac
@@ -730,16 +733,10 @@ _bpftool()
esac
;;
*)
- _bpftool_once_attr 'type'
- _bpftool_once_attr 'key'
- _bpftool_once_attr 'value'
- _bpftool_once_attr 'entries'
- _bpftool_once_attr 'name'
- _bpftool_once_attr 'flags'
+ _bpftool_once_attr 'type key value entries name flags dev'
if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
_bpftool_once_attr 'inner_map'
fi
- _bpftool_once_attr 'dev'
return 0
;;
esac
@@ -880,8 +877,7 @@ _bpftool()
return 0
;;
*)
- _bpftool_once_attr 'cpu'
- _bpftool_once_attr 'index'
+ _bpftool_once_attr 'cpu index'
return 0
;;
esac
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index e7f6ec3a8f35..6c5e0e82da22 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -821,3 +821,86 @@ void btf_dump_linfo_json(const struct btf *btf,
BPF_LINE_INFO_LINE_COL(linfo->line_col));
}
}
+
+static void dotlabel_puts(const char *s)
+{
+ for (; *s; ++s) {
+ switch (*s) {
+ case '\\':
+ case '"':
+ case '{':
+ case '}':
+ case '<':
+ case '>':
+ case '|':
+ case ' ':
+ putchar('\\');
+ __fallthrough;
+ default:
+ putchar(*s);
+ }
+ }
+}
+
+static const char *shorten_path(const char *path)
+{
+ const unsigned int MAX_PATH_LEN = 32;
+ size_t len = strlen(path);
+ const char *shortpath;
+
+ if (len <= MAX_PATH_LEN)
+ return path;
+
+ /* Search for last '/' under the MAX_PATH_LEN limit */
+ shortpath = strchr(path + len - MAX_PATH_LEN, '/');
+ if (shortpath) {
+ if (shortpath < path + strlen("..."))
+ /* We removed a very short prefix, e.g. "/w", and we'll
+ * make the path longer by prefixing with the ellipsis.
+ * Not worth it, keep initial path.
+ */
+ return path;
+ return shortpath;
+ }
+
+ /* File base name length is > MAX_PATH_LEN, search for last '/' */
+ shortpath = strrchr(path, '/');
+ if (shortpath)
+ return shortpath;
+
+ return path;
+}
+
+void btf_dump_linfo_dotlabel(const struct btf *btf,
+ const struct bpf_line_info *linfo, bool linum)
+{
+ const char *line = btf__name_by_offset(btf, linfo->line_off);
+
+ if (!line || !strlen(line))
+ return;
+ line = ltrim(line);
+
+ if (linum) {
+ const char *file = btf__name_by_offset(btf, linfo->file_name_off);
+ const char *shortfile;
+
+ /* More forgiving on file because linum option is
+ * expected to provide more info than the already
+ * available src line.
+ */
+ if (!file)
+ shortfile = "";
+ else
+ shortfile = shorten_path(file);
+
+ printf("; [%s", shortfile > file ? "..." : "");
+ dotlabel_puts(shortfile);
+ printf(" line:%u col:%u]\\l\\\n",
+ BPF_LINE_INFO_LINE_NUM(linfo->line_col),
+ BPF_LINE_INFO_LINE_COL(linfo->line_col));
+ }
+
+ printf("; ");
+ dotlabel_puts(line);
+ printf("\\l\\\n");
+}
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 1951219a9af7..eec437cca2ea 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -380,7 +380,9 @@ static void cfg_destroy(struct cfg *cfg)
}
}
-static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+static void
+draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd,
+ bool opcodes, bool linum)
{
const char *shape;
@@ -398,13 +400,10 @@ static void draw_bb_node(struct func_node *func, struct bb_node *bb)
printf("EXIT");
} else {
unsigned int start_idx;
- struct dump_data dd = {};
-
- printf("{");
- kernel_syms_load(&dd);
+ printf("{\\\n");
start_idx = bb->head - func->start;
- dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
- kernel_syms_destroy(&dd);
+ dump_xlated_for_graph(dd, bb->head, bb->tail, start_idx,
+ opcodes, linum);
printf("}");
}
@@ -430,12 +429,14 @@ static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
}
}
-static void func_output_bb_def(struct func_node *func)
+static void
+func_output_bb_def(struct func_node *func, struct dump_data *dd,
+ bool opcodes, bool linum)
{
struct bb_node *bb;
list_for_each_entry(bb, &func->bbs, l) {
- draw_bb_node(func, bb);
+ draw_bb_node(func, bb, dd, opcodes, linum);
}
}
@@ -455,7 +456,8 @@ static void func_output_edges(struct func_node *func)
func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
}
-static void cfg_dump(struct cfg *cfg)
+static void
+cfg_dump(struct cfg *cfg, struct dump_data *dd, bool opcodes, bool linum)
{
struct func_node *func;
@@ -463,14 +465,15 @@ static void cfg_dump(struct cfg *cfg)
list_for_each_entry(func, &cfg->funcs, l) {
printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
func->idx, func->idx);
- func_output_bb_def(func);
+ func_output_bb_def(func, dd, opcodes, linum);
func_output_edges(func);
printf("}\n");
}
printf("}\n");
}
-void dump_xlated_cfg(void *buf, unsigned int len)
+void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes, bool linum)
{
struct bpf_insn *insn = buf;
struct cfg cfg;
@@ -479,7 +482,7 @@ void dump_xlated_cfg(void *buf, unsigned int len)
if (cfg_build(&cfg, insn, len))
return;
- cfg_dump(&cfg);
+ cfg_dump(&cfg, dd, opcodes, linum);
cfg_destroy(&cfg);
}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
index e144257ea6d2..b3793f4e1783 100644
--- a/tools/bpf/bpftool/cfg.h
+++ b/tools/bpf/bpftool/cfg.h
@@ -4,6 +4,9 @@
#ifndef __BPF_TOOL_CFG_H
#define __BPF_TOOL_CFG_H
-void dump_xlated_cfg(void *buf, unsigned int len);
+#include "xlated_dumper.h"
+
+void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes, bool linum);
#endif /* __BPF_TOOL_CFG_H */
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index bca5dd0a59e3..be379613d118 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -75,7 +75,7 @@ static void jsonw_puts(json_writer_t *self, const char *str)
fputs("\\b", self->out);
break;
case '\\':
- fputs("\\n", self->out);
+ fputs("\\\\", self->out);
break;
case '"':
fputs("\\\"", self->out);
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index 8ace65cdb92f..5aaffd3b837b 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -14,6 +14,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdarg.h>
+#include <stdio.h>
#include <linux/compiler.h>
/* Opaque class structure */
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 0ef373cef4c7..00d11ca6d3f2 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -229,6 +229,8 @@ void btf_dump_linfo_plain(const struct btf *btf,
const char *prefix, bool linum);
void btf_dump_linfo_json(const struct btf *btf,
const struct bpf_line_info *linfo, bool linum);
+void btf_dump_linfo_dotlabel(const struct btf *btf,
+ const struct bpf_line_info *linfo, bool linum);
struct nlattr;
struct ifinfomsg;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index afbe3ec342c8..e5b613a7974c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -840,11 +840,6 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
false))
goto exit_free;
}
- } else if (visual) {
- if (json_output)
- jsonw_null(json_wtr);
- else
- dump_xlated_cfg(buf, member_len);
} else {
kernel_syms_load(&dd);
dd.nr_jited_ksyms = info->nr_jited_ksyms;
@@ -855,11 +850,11 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
dd.prog_linfo = prog_linfo;
if (json_output)
- dump_xlated_json(&dd, buf, member_len, opcodes,
- linum);
+ dump_xlated_json(&dd, buf, member_len, opcodes, linum);
+ else if (visual)
+ dump_xlated_cfg(&dd, buf, member_len, opcodes, linum);
else
- dump_xlated_plain(&dd, buf, member_len, opcodes,
- linum);
+ dump_xlated_plain(&dd, buf, member_len, opcodes, linum);
kernel_syms_destroy(&dd);
}
@@ -910,37 +905,46 @@ static int do_dump(int argc, char **argv)
if (nb_fds < 1)
goto exit_free;
- if (is_prefix(*argv, "file")) {
- NEXT_ARG();
- if (!argc) {
- p_err("expected file path");
- goto exit_close;
- }
- if (nb_fds > 1) {
- p_err("several programs matched");
- goto exit_close;
- }
+ while (argc) {
+ if (is_prefix(*argv, "file")) {
+ NEXT_ARG();
+ if (!argc) {
+ p_err("expected file path");
+ goto exit_close;
+ }
+ if (nb_fds > 1) {
+ p_err("several programs matched");
+ goto exit_close;
+ }
- filepath = *argv;
- NEXT_ARG();
- } else if (is_prefix(*argv, "opcodes")) {
- opcodes = true;
- NEXT_ARG();
- } else if (is_prefix(*argv, "visual")) {
- if (nb_fds > 1) {
- p_err("several programs matched");
+ filepath = *argv;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "opcodes")) {
+ opcodes = true;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "visual")) {
+ if (nb_fds > 1) {
+ p_err("several programs matched");
+ goto exit_close;
+ }
+
+ visual = true;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "linum")) {
+ linum = true;
+ NEXT_ARG();
+ } else {
+ usage();
goto exit_close;
}
-
- visual = true;
- NEXT_ARG();
- } else if (is_prefix(*argv, "linum")) {
- linum = true;
- NEXT_ARG();
}
- if (argc) {
- usage();
+ if (filepath && (opcodes || visual || linum)) {
+ p_err("'file' is not compatible with 'opcodes', 'visual', or 'linum'");
+ goto exit_close;
+ }
+ if (json_output && visual) {
+ p_err("'visual' is not compatible with JSON output");
goto exit_close;
}
@@ -1681,7 +1685,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
}
bpf_program__set_ifindex(pos, ifindex);
- bpf_program__set_type(pos, prog_type);
+ if (bpf_program__type(pos) != prog_type)
+ bpf_program__set_type(pos, prog_type);
bpf_program__set_expected_attach_type(pos, expected_attach_type);
}
@@ -2420,8 +2425,8 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s { show | list } [PROG]\n"
- " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
- " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n"
+ " %1$s %2$s dump xlated PROG [{ file FILE | [opcodes] [linum] [visual] }]\n"
+ " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n"
" %1$s %2$s pin PROG FILE\n"
" %1$s %2$s { load | loadall } OBJ PATH \\\n"
" [type TYPE] [dev NAME] \\\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 6fe3134ae45d..da608e10c843 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -361,7 +361,8 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
}
void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
- unsigned int start_idx)
+ unsigned int start_idx,
+ bool opcodes, bool linum)
{
const struct bpf_insn_cbs cbs = {
.cb_print = print_insn_for_graph,
@@ -369,14 +370,61 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
.cb_imm = print_imm,
.private_data = dd,
};
+ const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo;
+ const struct bpf_line_info *last_linfo = NULL;
+ struct bpf_func_info *record = dd->func_info;
struct bpf_insn *insn_start = buf_start;
struct bpf_insn *insn_end = buf_end;
struct bpf_insn *cur = insn_start;
+ struct btf *btf = dd->btf;
+ bool double_insn = false;
+ char func_sig[1024];
for (; cur <= insn_end; cur++) {
- printf("% 4d: ", (int)(cur - insn_start + start_idx));
+ unsigned int insn_off;
+
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+ double_insn = cur->code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ insn_off = (unsigned int)(cur - insn_start + start_idx);
+ if (btf && record) {
+ if (record->insn_off == insn_off) {
+ btf_dumper_type_only(btf, record->type_id,
+ func_sig,
+ sizeof(func_sig));
+ if (func_sig[0] != '\0')
+ printf("; %s:\\l\\\n", func_sig);
+ record = (void *)record + dd->finfo_rec_size;
+ }
+ }
+
+ if (prog_linfo) {
+ const struct bpf_line_info *linfo;
+
+ linfo = bpf_prog_linfo__lfind(prog_linfo, insn_off, 0);
+ if (linfo && linfo != last_linfo) {
+ btf_dump_linfo_dotlabel(btf, linfo, linum);
+ last_linfo = linfo;
+ }
+ }
+
+ printf("%d: ", insn_off);
print_bpf_insn(&cbs, cur, true);
+
+ if (opcodes) {
+ printf("\\ \\ \\ \\ ");
+ fprint_hex(stdout, cur, 8, " ");
+ if (double_insn && cur <= insn_end - 1) {
+ printf(" ");
+ fprint_hex(stdout, cur + 1, 8, " ");
+ }
+ printf("\\l\\\n");
+ }
+
if (cur != insn_end)
- printf(" | ");
+ printf("| ");
}
}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 54847e174273..9a946377b0e6 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -34,6 +34,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
bool opcodes, bool linum);
void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
- unsigned int start_index);
+ unsigned int start_index,
+ bool opcodes, bool linum);
#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 976b194eb775..3823100b7934 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@ enum bpf_attach_type {
BPF_PERF_EVENT,
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
+ BPF_STRUCT_OPS,
__MAX_BPF_ATTACH_TYPE
};
@@ -1108,7 +1109,7 @@ enum bpf_link_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
-/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will allow any alignment whatsoever. On platforms
* with strict alignment requirements for loads ands stores (such
* as sparc and mips) the verifier validates that all loads and
@@ -1266,6 +1267,9 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */
BPF_F_INNER_MAP = (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+ BPF_F_LINK = (1U << 13),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1403,6 +1407,11 @@ union bpf_attr {
__aligned_u64 fd_array; /* array of FDs */
__aligned_u64 core_relos;
__u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 log_true_size;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1488,6 +1497,11 @@ union bpf_attr {
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 btf_log_true_size;
};
struct {
@@ -1507,7 +1521,10 @@ union bpf_attr {
} task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
- __u32 prog_fd; /* eBPF program to attach */
+ union {
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 map_fd; /* struct_ops to attach */
+ };
union {
__u32 target_fd; /* object to attach to */
__u32 target_ifindex; /* target ifindex */
@@ -1548,12 +1565,23 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
- /* new program fd to update link with */
- __u32 new_prog_fd;
+ union {
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ /* new struct_ops map fd to update link with */
+ __u32 new_map_fd;
+ };
__u32 flags; /* extra flags */
- /* expected link's program fd; is specified only if
- * BPF_F_REPLACE flag is set in flags */
- __u32 old_prog_fd;
+ union {
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags.
+ */
+ __u32 old_prog_fd;
+ /* expected link's map fd; is specified only
+ * if BPF_F_REPLACE flag is set.
+ */
+ __u32 old_map_fd;
+ };
} link_update;
struct {
@@ -1647,17 +1675,17 @@ union bpf_attr {
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
- * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * to file *\/sys/kernel/tracing/trace* from TraceFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
- * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
- * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * Lines are discarded while *\/sys/kernel/tracing/trace* is
+ * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
- * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *\/sys/kernel/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
@@ -6379,6 +6407,9 @@ struct bpf_link_info {
struct {
__u32 ifindex;
} xdp;
+ struct {
+ __u32 map_id;
+ } struct_ops;
};
} __attribute__((aligned(8)));
@@ -7112,4 +7143,12 @@ enum {
BPF_F_TIMER_ABS = (1ULL << 0),
};
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+ /* opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __attribute__((aligned(8)));
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e750b6f5fcc3..128ac723c4ea 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -230,9 +230,9 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
const struct bpf_insn *insns, size_t insn_cnt,
- const struct bpf_prog_load_opts *opts)
+ struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -290,10 +290,6 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
if (!!log_buf != !!log_size)
return libbpf_err(-EINVAL);
- if (log_level > (4 | 2 | 1))
- return libbpf_err(-EINVAL);
- if (log_level && !log_buf)
- return libbpf_err(-EINVAL);
func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
func_info = OPTS_GET(opts, func_info, NULL);
@@ -316,6 +312,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
}
fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
if (fd >= 0)
return fd;
@@ -356,6 +353,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
}
fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
if (fd >= 0)
goto done;
}
@@ -370,6 +368,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.log_level = 1;
fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
}
done:
/* free() doesn't affect errno, so we don't need to restore it */
@@ -794,11 +793,17 @@ int bpf_link_update(int link_fd, int new_prog_fd,
if (!OPTS_VALID(opts, bpf_link_update_opts))
return libbpf_err(-EINVAL);
+ if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0))
+ return libbpf_err(-EINVAL);
+
memset(&attr, 0, attr_sz);
attr.link_update.link_fd = link_fd;
attr.link_update.new_prog_fd = new_prog_fd;
attr.link_update.flags = OPTS_GET(opts, flags, 0);
- attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ if (OPTS_GET(opts, old_prog_fd, 0))
+ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ else if (OPTS_GET(opts, old_map_fd, 0))
+ attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0);
ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
return libbpf_err_errno(ret);
@@ -1078,9 +1083,9 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
return libbpf_err_errno(fd);
}
-int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts)
+int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level);
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
union bpf_attr attr;
char *log_buf;
size_t log_size;
@@ -1123,6 +1128,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa
attr.btf_log_level = 1;
fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
}
+
+ OPTS_SET(opts, log_true_size, attr.btf_log_true_size);
return libbpf_err_errno(fd);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index f0f786373238..a2c091389b18 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -96,13 +96,20 @@ struct bpf_prog_load_opts {
__u32 log_level;
__u32 log_size;
char *log_buf;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ * If kernel doesn't support this feature, log_size is left unchanged.
+ */
+ __u32 log_true_size;
+ size_t :0;
};
-#define bpf_prog_load_opts__last_field log_buf
+#define bpf_prog_load_opts__last_field log_true_size
LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
const struct bpf_insn *insns, size_t insn_cnt,
- const struct bpf_prog_load_opts *opts);
+ struct bpf_prog_load_opts *opts);
/* Flags to direct loading requirements */
#define MAPS_RELAX_COMPAT 0x01
@@ -117,11 +124,18 @@ struct bpf_btf_load_opts {
char *log_buf;
__u32 log_level;
__u32 log_size;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ * If kernel doesn't support this feature, log_size is left unchanged.
+ */
+ __u32 log_true_size;
+ size_t :0;
};
-#define bpf_btf_load_opts__last_field log_size
+#define bpf_btf_load_opts__last_field log_true_size
LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
- const struct bpf_btf_load_opts *opts);
+ struct bpf_btf_load_opts *opts);
LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
@@ -336,8 +350,9 @@ struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
__u32 old_prog_fd; /* expected old program FD */
+ __u32 old_map_fd; /* expected old map FD */
};
-#define bpf_link_update_opts__last_field old_prog_fd
+#define bpf_link_update_opts__last_field old_map_fd
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts);
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 223308931d55..fdf44403ff36 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -11,6 +11,7 @@ struct ksym_relo_desc {
int insn_idx;
bool is_weak;
bool is_typeless;
+ bool is_ld64;
};
struct ksym_desc {
@@ -24,6 +25,7 @@ struct ksym_desc {
bool typeless;
};
int insn;
+ bool is_ld64;
};
struct bpf_gen {
@@ -65,7 +67,7 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u
void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
- bool is_typeless, int kind, int insn_idx);
+ bool is_typeless, bool is_ld64, int kind, int insn_idx);
void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo);
void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 7d12d3e620cc..e7e1a8acc299 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -177,6 +177,11 @@ enum libbpf_tristate {
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
+#define bpf_ksym_exists(sym) ({ \
+ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \
+ !!sym; \
+})
+
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
#endif
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 23f5c46708f8..83e8e3bfd8ff 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -560,7 +560,7 @@ static void emit_find_attach_target(struct bpf_gen *gen)
}
void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
- bool is_typeless, int kind, int insn_idx)
+ bool is_typeless, bool is_ld64, int kind, int insn_idx)
{
struct ksym_relo_desc *relo;
@@ -574,6 +574,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
relo->name = name;
relo->is_weak = is_weak;
relo->is_typeless = is_typeless;
+ relo->is_ld64 = is_ld64;
relo->kind = kind;
relo->insn_idx = insn_idx;
gen->relo_cnt++;
@@ -586,9 +587,11 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des
int i;
for (i = 0; i < gen->nr_ksyms; i++) {
- if (!strcmp(gen->ksyms[i].name, relo->name)) {
- gen->ksyms[i].ref++;
- return &gen->ksyms[i];
+ kdesc = &gen->ksyms[i];
+ if (kdesc->kind == relo->kind && kdesc->is_ld64 == relo->is_ld64 &&
+ !strcmp(kdesc->name, relo->name)) {
+ kdesc->ref++;
+ return kdesc;
}
}
kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc));
@@ -603,6 +606,7 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des
kdesc->ref = 1;
kdesc->off = 0;
kdesc->insn = 0;
+ kdesc->is_ld64 = relo->is_ld64;
return kdesc;
}
@@ -804,11 +808,13 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
return;
/* try to copy from existing ldimm64 insn */
if (kdesc->ref > 1) {
- move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
- kdesc->insn + offsetof(struct bpf_insn, imm));
move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
- /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */
+ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + offsetof(struct bpf_insn, imm));
+ /* jump over src_reg adjustment if imm (btf_id) is not 0, reuse BPF_REG_0 from move_blob2blob
+ * If btf_id is zero, clear BPF_PSEUDO_BTF_ID flag in src_reg of ld_imm64 insn
+ */
emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));
goto clear_src_reg;
}
@@ -831,7 +837,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,
sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
/* skip src_reg adjustment */
- emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3));
clear_src_reg:
/* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */
reg_mask = src_reg_mask();
@@ -862,23 +868,17 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn
{
int insn;
- pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx);
+ pr_debug("gen: emit_relo (%d): %s at %d %s\n",
+ relo->kind, relo->name, relo->insn_idx, relo->is_ld64 ? "ld64" : "call");
insn = insns + sizeof(struct bpf_insn) * relo->insn_idx;
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn));
- switch (relo->kind) {
- case BTF_KIND_VAR:
+ if (relo->is_ld64) {
if (relo->is_typeless)
emit_relo_ksym_typeless(gen, relo, insn);
else
emit_relo_ksym_btf(gen, relo, insn);
- break;
- case BTF_KIND_FUNC:
+ } else {
emit_relo_kfunc_btf(gen, relo, insn);
- break;
- default:
- pr_warn("Unknown relocation kind '%d'\n", relo->kind);
- gen->error = -EDOM;
- return;
}
}
@@ -901,18 +901,20 @@ static void cleanup_core_relo(struct bpf_gen *gen)
static void cleanup_relos(struct bpf_gen *gen, int insns)
{
+ struct ksym_desc *kdesc;
int i, insn;
for (i = 0; i < gen->nr_ksyms; i++) {
+ kdesc = &gen->ksyms[i];
/* only close fds for typed ksyms and kfuncs */
- if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) {
+ if (kdesc->is_ld64 && !kdesc->typeless) {
/* close fd recorded in insn[insn_idx + 1].imm */
- insn = gen->ksyms[i].insn;
+ insn = kdesc->insn;
insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm);
emit_sys_close_blob(gen, insn);
- } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) {
- emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off));
- if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ)
+ } else if (!kdesc->is_ld64) {
+ emit_sys_close_blob(gen, blob_fd_array_off(gen, kdesc->off));
+ if (kdesc->off < MAX_FD_ARRAY_SZ)
gen->nr_fd_array--;
}
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a557718401e4..49cd304ae3bc 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -116,6 +116,7 @@ static const char * const attach_type_name[] = {
[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
[BPF_PERF_EVENT] = "perf_event",
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
+ [BPF_STRUCT_OPS] = "struct_ops",
};
static const char * const link_type_name[] = {
@@ -215,9 +216,10 @@ static libbpf_print_fn_t __libbpf_pr = __base_pr;
libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
{
- libbpf_print_fn_t old_print_fn = __libbpf_pr;
+ libbpf_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
- __libbpf_pr = fn;
return old_print_fn;
}
@@ -226,8 +228,10 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
{
va_list args;
int old_errno;
+ libbpf_print_fn_t print_fn;
- if (!__libbpf_pr)
+ print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
return;
old_errno = errno;
@@ -315,8 +319,8 @@ enum reloc_type {
RELO_LD64,
RELO_CALL,
RELO_DATA,
- RELO_EXTERN_VAR,
- RELO_EXTERN_FUNC,
+ RELO_EXTERN_LD64,
+ RELO_EXTERN_CALL,
RELO_SUBPROG_ADDR,
RELO_CORE,
};
@@ -467,6 +471,7 @@ struct bpf_struct_ops {
#define KCONFIG_SEC ".kconfig"
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
+#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -596,6 +601,7 @@ struct elf_state {
Elf64_Ehdr *ehdr;
Elf_Data *symbols;
Elf_Data *st_ops_data;
+ Elf_Data *st_ops_link_data;
size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct elf_sec_desc *secs;
@@ -605,6 +611,7 @@ struct elf_state {
int text_shndx;
int symbols_shndx;
int st_ops_shndx;
+ int st_ops_link_shndx;
};
struct usdt_manager;
@@ -1118,7 +1125,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
return 0;
}
-static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
+static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
+ int shndx, Elf_Data *data, __u32 map_flags)
{
const struct btf_type *type, *datasec;
const struct btf_var_secinfo *vsi;
@@ -1129,15 +1137,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
struct bpf_map *map;
__u32 i;
- if (obj->efile.st_ops_shndx == -1)
+ if (shndx == -1)
return 0;
btf = obj->btf;
- datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
+ datasec_id = btf__find_by_name_kind(btf, sec_name,
BTF_KIND_DATASEC);
if (datasec_id < 0) {
pr_warn("struct_ops init: DATASEC %s not found\n",
- STRUCT_OPS_SEC);
+ sec_name);
return -EINVAL;
}
@@ -1150,7 +1158,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
type_id = btf__resolve_type(obj->btf, vsi->type);
if (type_id < 0) {
pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
- vsi->type, STRUCT_OPS_SEC);
+ vsi->type, sec_name);
return -EINVAL;
}
@@ -1169,7 +1177,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
if (IS_ERR(map))
return PTR_ERR(map);
- map->sec_idx = obj->efile.st_ops_shndx;
+ map->sec_idx = shndx;
map->sec_offset = vsi->offset;
map->name = strdup(var_name);
if (!map->name)
@@ -1179,6 +1187,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
map->def.key_size = sizeof(int);
map->def.value_size = type->size;
map->def.max_entries = 1;
+ map->def.map_flags = map_flags;
map->st_ops = calloc(1, sizeof(*map->st_ops));
if (!map->st_ops)
@@ -1191,14 +1200,14 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
return -ENOMEM;
- if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
+ if (vsi->offset + type->size > data->d_size) {
pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
- var_name, STRUCT_OPS_SEC);
+ var_name, sec_name);
return -EINVAL;
}
memcpy(st_ops->data,
- obj->efile.st_ops_data->d_buf + vsi->offset,
+ data->d_buf + vsi->offset,
type->size);
st_ops->tname = tname;
st_ops->type = type;
@@ -1211,6 +1220,19 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
return 0;
}
+static int bpf_object_init_struct_ops(struct bpf_object *obj)
+{
+ int err;
+
+ err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
+ obj->efile.st_ops_data, 0);
+ err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
+ obj->efile.st_ops_link_shndx,
+ obj->efile.st_ops_link_data,
+ BPF_F_LINK);
+ return err;
+}
+
static struct bpf_object *bpf_object__new(const char *path,
const void *obj_buf,
size_t obj_buf_sz,
@@ -1247,6 +1269,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.btf_maps_shndx = -1;
obj->efile.st_ops_shndx = -1;
+ obj->efile.st_ops_link_shndx = -1;
obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
@@ -1264,6 +1287,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.elf = NULL;
obj->efile.symbols = NULL;
obj->efile.st_ops_data = NULL;
+ obj->efile.st_ops_link_data = NULL;
zfree(&obj->efile.secs);
obj->efile.sec_cnt = 0;
@@ -2618,7 +2642,7 @@ static int bpf_object__init_maps(struct bpf_object *obj,
err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
- err = err ?: bpf_object__init_struct_ops_maps(obj);
+ err = err ?: bpf_object_init_struct_ops(obj);
return err;
}
@@ -2752,12 +2776,13 @@ static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
obj->efile.st_ops_shndx >= 0 ||
+ obj->efile.st_ops_link_shndx >= 0 ||
obj->nr_extern > 0;
}
static bool kernel_needs_btf(const struct bpf_object *obj)
{
- return obj->efile.st_ops_shndx >= 0;
+ return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -3450,6 +3475,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
obj->efile.st_ops_data = data;
obj->efile.st_ops_shndx = idx;
+ } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
+ obj->efile.st_ops_link_data = data;
+ obj->efile.st_ops_link_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -3464,6 +3492,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
/* Only do relo for section with exec instructions */
if (!section_have_execinstr(obj, targ_sec_idx) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
idx, name, targ_sec_idx,
@@ -4009,9 +4038,9 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
prog->name, i, ext->name, ext->sym_idx, insn_idx);
if (insn->code == (BPF_JMP | BPF_CALL))
- reloc_desc->type = RELO_EXTERN_FUNC;
+ reloc_desc->type = RELO_EXTERN_CALL;
else
- reloc_desc->type = RELO_EXTERN_VAR;
+ reloc_desc->type = RELO_EXTERN_LD64;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
@@ -5855,7 +5884,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
relo->map_idx, map);
}
break;
- case RELO_EXTERN_VAR:
+ case RELO_EXTERN_LD64:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
if (obj->gen_loader) {
@@ -5877,7 +5906,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
}
break;
- case RELO_EXTERN_FUNC:
+ case RELO_EXTERN_CALL:
ext = &obj->externs[relo->sym_off];
insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
if (ext->is_set) {
@@ -6115,7 +6144,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
continue;
relo = find_prog_insn_relo(prog, insn_idx);
- if (relo && relo->type == RELO_EXTERN_FUNC)
+ if (relo && relo->type == RELO_EXTERN_CALL)
/* kfunc relocations will be handled later
* in bpf_object__relocate_data()
*/
@@ -6610,7 +6639,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx)
+ if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
@@ -7070,18 +7099,21 @@ static int bpf_program_record_relos(struct bpf_program *prog)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
struct extern_desc *ext = &obj->externs[relo->sym_off];
+ int kind;
switch (relo->type) {
- case RELO_EXTERN_VAR:
+ case RELO_EXTERN_LD64:
if (ext->type != EXT_KSYM)
continue;
+ kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
+ BTF_KIND_VAR : BTF_KIND_FUNC;
bpf_gen__record_extern(obj->gen_loader, ext->name,
ext->is_weak, !ext->ksym.type_id,
- BTF_KIND_VAR, relo->insn_idx);
+ true, kind, relo->insn_idx);
break;
- case RELO_EXTERN_FUNC:
+ case RELO_EXTERN_CALL:
bpf_gen__record_extern(obj->gen_loader, ext->name,
- ext->is_weak, false, BTF_KIND_FUNC,
+ ext->is_weak, false, false, BTF_KIND_FUNC,
relo->insn_idx);
break;
case RELO_CORE: {
@@ -7533,6 +7565,12 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
ext->is_set = true;
ext->ksym.kernel_btf_id = kfunc_id;
ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
+ /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
+ * populates FD into ld_imm64 insn when it's used to point to kfunc.
+ * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
+ * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
+ */
+ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
ext->name, kfunc_id);
@@ -7677,6 +7715,37 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
return 0;
}
+static void bpf_map_prepare_vdata(const struct bpf_map *map)
+{
+ struct bpf_struct_ops *st_ops;
+ __u32 i;
+
+ st_ops = map->st_ops;
+ for (i = 0; i < btf_vlen(st_ops->type); i++) {
+ struct bpf_program *prog = st_ops->progs[i];
+ void *kern_data;
+ int prog_fd;
+
+ if (!prog)
+ continue;
+
+ prog_fd = bpf_program__fd(prog);
+ kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
+ *(unsigned long *)kern_data = prog_fd;
+ }
+}
+
+static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_maps; i++)
+ if (bpf_map__is_struct_ops(&obj->maps[i]))
+ bpf_map_prepare_vdata(&obj->maps[i]);
+
+ return 0;
+}
+
static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
{
int err, i;
@@ -7702,6 +7771,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
err = err ? : bpf_object__load_progs(obj, extra_log_level);
err = err ? : bpf_object_init_prog_arrays(obj);
+ err = err ? : bpf_object_prepare_struct_ops(obj);
if (obj->gen_loader) {
/* reset FDs */
@@ -8398,6 +8468,7 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
return libbpf_err(-EBUSY);
prog->type = type;
+ prog->sec_def = NULL;
return 0;
}
@@ -8811,6 +8882,7 @@ const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
}
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
+ int sec_idx,
size_t offset)
{
struct bpf_map *map;
@@ -8820,7 +8892,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
map = &obj->maps[i];
if (!bpf_map__is_struct_ops(map))
continue;
- if (map->sec_offset <= offset &&
+ if (map->sec_idx == sec_idx &&
+ map->sec_offset <= offset &&
offset - map->sec_offset < map->def.value_size)
return map;
}
@@ -8862,7 +8935,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
}
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
- map = find_struct_ops_map_by_offset(obj, rel->r_offset);
+ map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
if (!map) {
pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
(size_t)rel->r_offset);
@@ -8929,8 +9002,9 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
}
/* struct_ops BPF prog can be re-used between multiple
- * .struct_ops as long as it's the same struct_ops struct
- * definition and the same function pointer field
+ * .struct_ops & .struct_ops.link as long as it's the
+ * same struct_ops struct definition and the same
+ * function pointer field
*/
if (prog->attach_btf_id != st_ops->type_id ||
prog->expected_attach_type != member_idx) {
@@ -9912,16 +9986,20 @@ static int append_to_file(const char *file, const char *fmt, ...)
{
int fd, n, err = 0;
va_list ap;
+ char buf[1024];
+
+ va_start(ap, fmt);
+ n = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (n < 0 || n >= sizeof(buf))
+ return -EINVAL;
fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
if (fd < 0)
return -errno;
- va_start(ap, fmt);
- n = vdprintf(fd, fmt, ap);
- va_end(ap);
-
- if (n < 0)
+ if (write(fd, buf, n) < 0)
err = -errno;
close(fd);
@@ -11566,22 +11644,30 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
return link;
}
+struct bpf_link_struct_ops {
+ struct bpf_link link;
+ int map_fd;
+};
+
static int bpf_link__detach_struct_ops(struct bpf_link *link)
{
+ struct bpf_link_struct_ops *st_link;
__u32 zero = 0;
- if (bpf_map_delete_elem(link->fd, &zero))
- return -errno;
+ st_link = container_of(link, struct bpf_link_struct_ops, link);
- return 0;
+ if (st_link->map_fd < 0)
+ /* w/o a real link */
+ return bpf_map_delete_elem(link->fd, &zero);
+
+ return close(link->fd);
}
struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
{
- struct bpf_struct_ops *st_ops;
- struct bpf_link *link;
- __u32 i, zero = 0;
- int err;
+ struct bpf_link_struct_ops *link;
+ __u32 zero = 0;
+ int err, fd;
if (!bpf_map__is_struct_ops(map) || map->fd == -1)
return libbpf_err_ptr(-EINVAL);
@@ -11590,31 +11676,72 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
if (!link)
return libbpf_err_ptr(-EINVAL);
- st_ops = map->st_ops;
- for (i = 0; i < btf_vlen(st_ops->type); i++) {
- struct bpf_program *prog = st_ops->progs[i];
- void *kern_data;
- int prog_fd;
+ /* kern_vdata should be prepared during the loading phase. */
+ err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+ /* It can be EBUSY if the map has been used to create or
+ * update a link before. We don't allow updating the value of
+ * a struct_ops once it is set. That ensures that the value
+ * never changed. So, it is safe to skip EBUSY.
+ */
+ if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
+ free(link);
+ return libbpf_err_ptr(err);
+ }
- if (!prog)
- continue;
+ link->link.detach = bpf_link__detach_struct_ops;
- prog_fd = bpf_program__fd(prog);
- kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
- *(unsigned long *)kern_data = prog_fd;
+ if (!(map->def.map_flags & BPF_F_LINK)) {
+ /* w/o a real link */
+ link->link.fd = map->fd;
+ link->map_fd = -1;
+ return &link->link;
}
- err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
- if (err) {
- err = -errno;
+ fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
+ if (fd < 0) {
free(link);
- return libbpf_err_ptr(err);
+ return libbpf_err_ptr(fd);
}
- link->detach = bpf_link__detach_struct_ops;
- link->fd = map->fd;
+ link->link.fd = fd;
+ link->map_fd = map->fd;
- return link;
+ return &link->link;
+}
+
+/*
+ * Swap the back struct_ops of a link with a new struct_ops map.
+ */
+int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
+{
+ struct bpf_link_struct_ops *st_ops_link;
+ __u32 zero = 0;
+ int err;
+
+ if (!bpf_map__is_struct_ops(map) || map->fd < 0)
+ return -EINVAL;
+
+ st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
+ /* Ensure the type of a link is correct */
+ if (st_ops_link->map_fd < 0)
+ return -EINVAL;
+
+ err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+ /* It can be EBUSY if the map has been used to create or
+ * update a link before. We don't allow updating the value of
+ * a struct_ops once it is set. That ensures that the value
+ * never changed. So, it is safe to skip EBUSY.
+ */
+ if (err && err != -EBUSY)
+ return err;
+
+ err = bpf_link_update(link->fd, map->fd, NULL);
+ if (err < 0)
+ return err;
+
+ st_ops_link->map_fd = map->fd;
+
+ return 0;
}
typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index db4992a036f8..0b7362397ea3 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -101,6 +101,8 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
* be used for libbpf warnings and informational messages.
* @param fn The log print function. If NULL, libbpf won't print anything.
* @return Pointer to old print function.
+ *
+ * This function is thread-safe.
*/
LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
@@ -719,6 +721,7 @@ bpf_program__attach_freplace(const struct bpf_program *prog,
struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
+LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 50dde1f6521e..a5aa3a383d69 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -386,6 +386,7 @@ LIBBPF_1.1.0 {
LIBBPF_1.2.0 {
global:
bpf_btf_get_info_by_fd;
+ bpf_link__update_map;
bpf_link_get_info_by_fd;
bpf_map_get_info_by_fd;
bpf_prog_get_info_by_fd;
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index d7069780984a..5ced96d99f8c 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1115,7 +1115,19 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src
if (src->shdr->sh_type != SHT_NOBITS) {
tmp = realloc(dst->raw_data, dst_final_sz);
- if (!tmp)
+ /* If dst_align_sz == 0, realloc() behaves in a special way:
+ * 1. When dst->raw_data is NULL it returns:
+ * "either NULL or a pointer suitable to be passed to free()" [1].
+ * 2. When dst->raw_data is not-NULL it frees dst->raw_data and returns NULL,
+ * thus invalidating any "pointer suitable to be passed to free()" obtained
+ * at step (1).
+ *
+ * The dst_align_sz > 0 check avoids error exit after (2), otherwise
+ * dst->raw_data would be freed again in bpf_linker__free().
+ *
+ * [1] man 3 realloc
+ */
+ if (!tmp && dst_align_sz > 0)
return -ENOMEM;
dst->raw_data = tmp;
diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
index f561aa07438f..3f26d629b2b4 100644
--- a/tools/lib/bpf/zip.c
+++ b/tools/lib/bpf/zip.c
@@ -16,6 +16,10 @@
#include "libbpf_internal.h"
#include "zip.h"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+
/* Specification of ZIP file format can be found here:
* https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
* For a high level overview of the structure of a ZIP file see
@@ -119,6 +123,8 @@ struct local_file_header {
__u16 extra_field_length;
} __attribute__((packed));
+#pragma GCC diagnostic pop
+
struct zip_archive {
void *data;
__u32 size;
diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c
index 7c2ac124cdc8..99798894b879 100644
--- a/tools/mm/page_owner_sort.c
+++ b/tools/mm/page_owner_sort.c
@@ -857,7 +857,7 @@ int main(int argc, char **argv)
if (cull & CULL_PID || filter & FILTER_PID)
fprintf(fout, ", PID %d", list[i].pid);
if (cull & CULL_TGID || filter & FILTER_TGID)
- fprintf(fout, ", TGID %d", list[i].pid);
+ fprintf(fout, ", TGID %d", list[i].tgid);
if (cull & CULL_COMM || filter & FILTER_COMM)
fprintf(fout, ", task_comm_name: %s", list[i].comm);
if (cull & CULL_ALLOCATOR) {
diff --git a/tools/net/ynl/ethtool b/tools/net/ynl/ethtool.py
index 5fb1d670693a..6c9f7e31250c 100755
--- a/tools/net/ynl/ethtool
+++ b/tools/net/ynl/ethtool.py
@@ -152,8 +152,8 @@ def main():
global args
args = parser.parse_args()
- spec = '/usr/local/google/home/sdf/src/linux/Documentation/netlink/specs/ethtool.yaml'
- schema = '/usr/local/google/home/sdf/src/linux/Documentation/netlink/genetlink-legacy.yaml'
+ spec = '../../../Documentation/netlink/specs/ethtool.yaml'
+ schema = '../../../Documentation/netlink/genetlink-legacy.yaml'
ynl = YnlFamily(spec, schema)
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 7690e0b0cb3f..aa77bcae4807 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -358,7 +358,10 @@ class YnlFamily(SpecFamily):
bound_f = functools.partial(self._op, op_name)
setattr(self, op.ident_name, bound_f)
- self.family = GenlFamily(self.yaml['name'])
+ try:
+ self.family = GenlFamily(self.yaml['name'])
+ except KeyError:
+ raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
def ntf_subscribe(self, mcast_name):
if mcast_name not in self.family.genl_family['mcast']:
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 958ee9bdb316..4c89ff333f6f 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mn->slot[1] != NULL);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas.node = MAS_START;
mas_nomem(&mas, GFP_KERNEL);
@@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != i);
MT_BUG_ON(mt, !mn);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
@@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
MT_BUG_ON(mt, !mn);
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
@@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
}
@@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas2); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
@@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
@@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree *mt)
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
}
@@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
mas_destroy(&mas);
@@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
+ mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
@@ -35756,6 +35771,7 @@ void farmer_tests(void)
tree.ma_root = mt_mk_node(node, maple_leaf_64);
mt_dump(&tree);
+ node->parent = ma_parent_ptr(node);
ma_free_rcu(node);
/* Check things that will make lockdep angry */
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 99cc33c51eaa..0a6837f97c32 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -44,6 +44,7 @@ lookup_key # test_lookup_key__attach unexp
lru_bug # lru_bug__attach unexpected error: -524 (errno 524)
modify_return # modify_return__attach failed unexpected error: -524 (errno 524)
module_attach # skel_attach skeleton attach failed: -524
+module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0
mptcp/base # run_test mptcp unexpected error: -524 (errno 524)
netcnt # packets unexpected packets: actual 10001 != expected 10000
rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a02a085e7f32..c7463f3ec3c0 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -8,8 +8,10 @@ dynptr/test_dynptr_skb_data
dynptr/test_skb_readonly
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
+iters/testmod_seq* # s390x doesn't support kfuncs in modules yet
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test # relies on fentry
+ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc)
ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 16f404aa1b23..c49e5403ad0e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,7 @@ endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_verifier_log test_dev_cgroup \
+ test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
test_tcpnotify_user test_sysctl \
@@ -201,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
-$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
+$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
$(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
@@ -231,9 +231,11 @@ TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
-CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o
TESTING_HELPERS := $(OUTPUT)/testing_helpers.o
+CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o
+UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o
TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
+JSON_WRITER := $(OUTPUT)/json_writer.o
CAP_HELPERS := $(OUTPUT)/cap_helpers.o
$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
@@ -251,7 +253,7 @@ $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
$(OUTPUT)/xdping: $(TESTING_HELPERS)
$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
$(OUTPUT)/test_maps: $(TESTING_HELPERS)
-$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
+$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS)
$(OUTPUT)/xsk.o: $(BPFOBJ)
BPFTOOL ?= $(DEFAULT_BPFTOOL)
@@ -352,12 +354,12 @@ CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
-I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types -Wuninitialized
+ -Wno-compare-distinct-pointer-types
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
@@ -559,7 +561,9 @@ TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
network_helpers.c testing_helpers.c \
btf_helpers.c flow_dissector_load.h \
- cap_helpers.c test_loader.c xsk.c disasm.c
+ cap_helpers.c test_loader.c xsk.c disasm.c \
+ json_writer.c unpriv_helpers.c
+
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
@@ -608,7 +612,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
-$(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -639,6 +643,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
+$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
@@ -656,6 +661,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage.o \
$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
$(OUTPUT)/bench_bpf_hashmap_lookup.o \
+ $(OUTPUT)/bench_local_storage_create.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/autoconf_helper.h b/tools/testing/selftests/bpf/autoconf_helper.h
new file mode 100644
index 000000000000..5b243b9cdf8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/autoconf_helper.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 0b2a53bb8460..d9c080ac1796 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -278,6 +278,7 @@ extern struct argp bench_local_storage_argp;
extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;
+extern struct argp bench_local_storage_create_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -288,6 +289,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_local_storage_rcu_tasks_trace_argp, 0,
"local_storage RCU Tasks Trace slowdown benchmark", 0 },
{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
+ { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
{},
};
@@ -515,6 +517,7 @@ extern const struct bench bench_local_storage_cache_interleaved_get;
extern const struct bench bench_local_storage_cache_hashmap_control;
extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;
+extern const struct bench bench_local_storage_create;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -555,6 +558,7 @@ static const struct bench *benchs[] = {
&bench_local_storage_cache_hashmap_control,
&bench_local_storage_tasks_trace,
&bench_bpf_hashmap_lookup,
+ &bench_local_storage_create,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
new file mode 100644
index 000000000000..cff703f90e95
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+#include <argp.h>
+
+#include "bench.h"
+#include "bench_local_storage_create.skel.h"
+
+struct thread {
+ int *fds;
+ pthread_t *pthds;
+ int *pthd_results;
+};
+
+static struct bench_local_storage_create *skel;
+static struct thread *threads;
+static long create_owner_errs;
+static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
+static int batch_sz = 32;
+
+enum {
+ ARG_BATCH_SZ = 9000,
+ ARG_STORAGE_TYPE = 9001,
+};
+
+static const struct argp_option opts[] = {
+ { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
+ "The number of storage creations in each batch" },
+ { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
+ "The type of local storage to test (socket or task)" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ int ret;
+
+ switch (key) {
+ case ARG_BATCH_SZ:
+ ret = atoi(arg);
+ if (ret < 1) {
+ fprintf(stderr, "invalid batch-size\n");
+ argp_usage(state);
+ }
+ batch_sz = ret;
+ break;
+ case ARG_STORAGE_TYPE:
+ if (!strcmp(arg, "task")) {
+ storage_type = BPF_MAP_TYPE_TASK_STORAGE;
+ } else if (!strcmp(arg, "socket")) {
+ storage_type = BPF_MAP_TYPE_SK_STORAGE;
+ } else {
+ fprintf(stderr, "invalid storage-type (socket or task)\n");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_create_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt > 1) {
+ fprintf(stderr,
+ "local-storage-create benchmark does not need consumer\n");
+ exit(1);
+ }
+}
+
+static void setup(void)
+{
+ int i;
+
+ skel = bench_local_storage_create__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ skel->bss->bench_pid = getpid();
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ if (!bpf_program__attach(skel->progs.socket_post_create)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ } else {
+ if (!bpf_program__attach(skel->progs.sched_process_fork)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ }
+
+ if (!bpf_program__attach(skel->progs.kmalloc)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+
+ threads = calloc(env.producer_cnt, sizeof(*threads));
+
+ if (!threads) {
+ fprintf(stderr, "cannot alloc thread_res\n");
+ exit(1);
+ }
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ struct thread *t = &threads[i];
+
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ t->fds = malloc(batch_sz * sizeof(*t->fds));
+ if (!t->fds) {
+ fprintf(stderr, "cannot alloc t->fds\n");
+ exit(1);
+ }
+ } else {
+ t->pthds = malloc(batch_sz * sizeof(*t->pthds));
+ if (!t->pthds) {
+ fprintf(stderr, "cannot alloc t->pthds\n");
+ exit(1);
+ }
+ t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
+ if (!t->pthd_results) {
+ fprintf(stderr, "cannot alloc t->pthd_results\n");
+ exit(1);
+ }
+ }
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&skel->bss->create_cnts, 0);
+ res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0);
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void *sk_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ int *fds = t->fds;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (fds[i] == -1)
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (fds[i] != -1)
+ close(fds[i]);
+ }
+ }
+
+ return NULL;
+}
+
+static void *thread_func(void *arg)
+{
+ return NULL;
+}
+
+static void *task_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ pthread_t *pthds = t->pthds;
+ int *pthd_results = t->pthd_results;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
+ if (pthd_results[i])
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (!pthd_results[i])
+ pthread_join(pthds[i], NULL);;
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
+ return sk_producer(input);
+ else
+ return task_producer(input);
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double creates_per_sec, kmallocs_per_create;
+
+ creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ kmallocs_per_create = (double)res->drops / res->hits;
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("creates %8.3lfk/s (%7.3lfk/prod), ",
+ creates_per_sec, creates_per_sec / env.producer_cnt);
+ printf("%3.2lf kmallocs/create\n", kmallocs_per_create);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double creates_mean = 0.0, creates_stddev = 0.0;
+ long total_creates = 0, total_kmallocs = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ total_creates += res[i].hits;
+ total_kmallocs += res[i].drops;
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ creates_stddev += (creates_mean - res[i].hits / 1000.0) *
+ (creates_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ creates_stddev = sqrt(creates_stddev);
+ }
+ printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
+ creates_mean, creates_stddev, creates_mean / env.producer_cnt);
+ printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
+ if (create_owner_errs || skel->bss->create_errs)
+ printf("%s() errors %ld create_errs %ld\n",
+ storage_type == BPF_MAP_TYPE_SK_STORAGE ?
+ "socket" : "pthread_create",
+ create_owner_errs,
+ skel->bss->create_errs);
+}
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_local_storage_create = {
+ .name = "local-storage-create",
+ .argp = &bench_local_storage_create_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 46500636d8cd..fe847ebfb731 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -28,6 +28,10 @@ struct bpf_testmod_struct_arg_2 {
long b;
};
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in bpf_testmod.ko BTF");
+
noinline int
bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
bpf_testmod_test_struct_arg_result = a.a + a.b + b + c;
@@ -65,6 +69,34 @@ bpf_testmod_test_mod_kfunc(int i)
*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
}
+__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt)
+{
+ if (cnt < 0) {
+ it->cnt = 0;
+ return -EINVAL;
+ }
+
+ it->value = value;
+ it->cnt = cnt;
+
+ return 0;
+}
+
+__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it)
+{
+ if (it->cnt <= 0)
+ return NULL;
+
+ it->cnt--;
+
+ return &it->value;
+}
+
+__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
+{
+ it->cnt = 0;
+}
+
struct bpf_testmod_btf_type_tag_1 {
int a;
};
@@ -102,7 +134,11 @@ bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) {
noinline int bpf_testmod_loop_test(int n)
{
- int i, sum = 0;
+ /* Make sum volatile, so smart compilers, such as clang, will not
+ * optimize the code by removing the loop.
+ */
+ volatile int sum = 0;
+ int i;
/* the primary goal of this test is to test LBR. Create a lot of
* branches in the function, so we can catch it easily.
@@ -143,6 +179,8 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
return a + b + c;
}
+__diag_pop();
+
int bpf_testmod_fentry_ok;
noinline ssize_t
@@ -220,6 +258,17 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
+BTF_SET8_START(bpf_testmod_common_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
+BTF_SET8_END(bpf_testmod_common_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_common_kfunc_ids,
+};
+
BTF_SET8_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_SET8_END(bpf_testmod_check_kfunc_ids)
@@ -229,13 +278,20 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
.set = &bpf_testmod_check_kfunc_ids,
};
+noinline int bpf_fentry_shadow_test(int a)
+{
+ return a + 2;
+}
+EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
int ret;
- ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index 0d71e2607832..f32793efe095 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -22,4 +22,10 @@ struct bpf_testmod_test_writable_ctx {
int val;
};
+/* BPF iter that returns *value* *n* times in a row */
+struct bpf_iter_testmod_seq {
+ s64 value;
+ int cnt;
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index 156743cf5870..aefd83ebdcd7 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -86,8 +86,13 @@ int main(int argc, char **argv)
pid = getpid();
bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/%s/id", probe_name);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c
new file mode 120000
index 000000000000..5effa31e2f39
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.c
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h
new file mode 120000
index 000000000000..e0a264c26752
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.h
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 01de33191226..596caa176582 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -95,7 +95,7 @@ static int __start_server(int type, int protocol, const struct sockaddr *addr,
if (reuseport &&
setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
log_err("Failed to set SO_REUSEPORT");
- return -1;
+ goto error_close;
}
if (bind(fd, addr, addrlen) < 0) {
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index c94fa8d6c4f6..b92770592563 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -575,14 +575,14 @@ static struct bpf_align_test tests[] = {
/* New unknown value in R7 is (4n), >= 76 */
{14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"},
+ {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"},
+ {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
},
},
};
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e980188d4124..a53c254c6058 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -8,6 +8,7 @@
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
+#include "tcp_ca_update.skel.h"
#include "bpf_dctcp_release.skel.h"
#include "tcp_ca_write_sk_pacing.skel.h"
#include "tcp_ca_incompl_cong_ops.skel.h"
@@ -381,6 +382,155 @@ static void test_unsupp_cong_op(void)
libbpf_set_print(old_print_fn);
}
+static void test_update_ca(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_update_2);
+ ASSERT_OK(err, "update_map");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+ ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
+
+ bpf_link__destroy(link);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_update_wrong(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_wrong);
+ ASSERT_ERR(err, "update_map");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+
+ bpf_link__destroy(link);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_mixed_links(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link, *link_nl;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
+ ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl");
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_no_link);
+ ASSERT_ERR(err, "update_map");
+
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_nl);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_multi_links(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ /* A map should be able to be used to create links multiple
+ * times.
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+ bpf_link__destroy(link);
+
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_link_replace(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
+ ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+
+ /* BPF_F_REPLACE with a wrong old map Fd. It should fail!
+ *
+ * With BPF_F_REPLACE, the link should be updated only if the
+ * old map fd given here matches the map backing the link.
+ */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_ERR(err, "bpf_link_update_fail");
+
+ /* BPF_F_REPLACE with a correct old map Fd. It should success! */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2);
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_OK(err, "bpf_link_update_success");
+
+ bpf_link__destroy(link);
+
+ tcp_ca_update__destroy(skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -399,4 +549,14 @@ void test_bpf_tcp_ca(void)
test_incompl_cong_ops();
if (test__start_subtest("unsupp_cong_op"))
test_unsupp_cong_op();
+ if (test__start_subtest("update_ca"))
+ test_update_ca();
+ if (test__start_subtest("update_wrong"))
+ test_update_wrong();
+ if (test__start_subtest("mixed_links"))
+ test_mixed_links();
+ if (test__start_subtest("multi_links"))
+ test_multi_links();
+ if (test__start_subtest("link_replace"))
+ test_link_replace();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 5ca252823294..731c343897d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -144,6 +144,12 @@ void test_verif_scale_pyperf600_nounroll()
scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}
+void test_verif_scale_pyperf600_iter()
+{
+ /* open-coded BPF iterator version */
+ scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
void test_verif_scale_loop1()
{
scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 621c57222191..63ee892bc757 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -56,8 +56,9 @@ static bool assert_storage_noexist(struct bpf_map *map, const void *key)
static bool connect_send(const char *cgroup_path)
{
- bool res = true;
int server_fd = -1, client_fd = -1;
+ char message[] = "message";
+ bool res = true;
if (join_cgroup(cgroup_path))
goto out_clean;
@@ -70,7 +71,10 @@ static bool connect_send(const char *cgroup_path)
if (client_fd < 0)
goto out_clean;
- if (send(client_fd, "message", strlen("message"), 0) < 0)
+ if (send(client_fd, &message, sizeof(message), 0) < 0)
+ goto out_clean;
+
+ if (read(server_fd, &message, sizeof(message)) < 0)
goto out_clean;
res = false;
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index 5fbe457c4ebe..cdf4acc18e4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -16,7 +16,7 @@ static const char * const cpumask_success_testcases[] = {
"test_copy_any_anyand",
"test_insert_leave",
"test_insert_remove_release",
- "test_insert_kptr_get_release",
+ "test_global_mask_rcu",
};
static void verify_success(const char *prog_name)
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index d5fe3d4b936c..4951aa978f33 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -68,17 +68,17 @@ static struct test_case test_cases[] = {
#if defined(__x86_64__) || defined(__aarch64__)
{
N(SCHED_CLS, struct __sk_buff, tstamp),
- .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
- "w11 &= 160;"
- "if w11 != 0xa0 goto pc+2;"
+ .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "w11 &= 3;"
+ "if w11 != 0x3 goto pc+2;"
"$dst = 0;"
"goto pc+1;"
"$dst = *(u64 *)($ctx + sk_buff::tstamp);",
- .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
- "if w11 & 0x80 goto pc+1;"
+ .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "if w11 & 0x2 goto pc+1;"
"goto pc+2;"
- "w11 &= -33;"
- "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;"
+ "w11 &= -2;"
+ "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
"*(u64 *)($ctx + sk_buff::tstamp) = $src;",
},
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 429393caf612..a1e712105811 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -54,11 +54,19 @@ static int setup_netns(void)
SYS(fail, "ip link add veth1 type veth peer name veth2");
SYS(fail, "ip link set dev veth1 up");
+ err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
- SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR);
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
@@ -158,7 +166,7 @@ void test_fib_lookup(void)
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
continue;
- ASSERT_EQ(tests[i].expected_ret, skel->bss->fib_lookup_ret,
+ ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
"fib_lookup_ret");
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
index 3948da12a528..0394a1156d99 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -37,8 +37,8 @@ static int create_perf_events(void)
/* create perf event */
attr.size = sizeof(attr);
- attr.type = PERF_TYPE_RAW;
- attr.config = 0x1b00;
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index 5308de1ed478..2715c68301f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -65,6 +65,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
+ bpf_link__destroy(skel->links.oncpu);
close(pmu_fd);
/* add exclude_callchain_kernel, attach should fail */
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
new file mode 100644
index 000000000000..10804ae5ae97
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+
+#include "iters.skel.h"
+#include "iters_state_safety.skel.h"
+#include "iters_looping.skel.h"
+#include "iters_num.skel.h"
+#include "iters_testmod_seq.skel.h"
+
+static void subtest_num_iters(void)
+{
+ struct iters_num *skel;
+ int err;
+
+ skel = iters_num__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_num__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_num__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty_zero);
+ VALIDATE_CASE(empty_int_min);
+ VALIDATE_CASE(empty_int_max);
+ VALIDATE_CASE(empty_minus_one);
+
+ VALIDATE_CASE(simple_sum);
+ VALIDATE_CASE(neg_sum);
+ VALIDATE_CASE(very_neg_sum);
+ VALIDATE_CASE(neg_pos_sum);
+
+ VALIDATE_CASE(invalid_range);
+ VALIDATE_CASE(max_range);
+ VALIDATE_CASE(e2big_range);
+
+ VALIDATE_CASE(succ_elem_cnt);
+ VALIDATE_CASE(overfetched_elem_cnt);
+ VALIDATE_CASE(fail_elem_cnt);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_num__destroy(skel);
+}
+
+static void subtest_testmod_seq_iters(void)
+{
+ struct iters_testmod_seq *skel;
+ int err;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ skel = iters_testmod_seq__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_testmod_seq__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_testmod_seq__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty);
+ VALIDATE_CASE(full);
+ VALIDATE_CASE(truncated);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_testmod_seq__destroy(skel);
+}
+
+void test_iters(void)
+{
+ RUN_TESTS(iters_state_safety);
+ RUN_TESTS(iters_looping);
+ RUN_TESTS(iters);
+
+ if (env.has_testmod)
+ RUN_TESTS(iters_testmod_seq);
+
+ if (test__start_subtest("num"))
+ subtest_num_iters();
+ if (test__start_subtest("testmod_seq"))
+ subtest_testmod_seq_iters();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 113dba349a57..2173c4bb555e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -338,7 +338,12 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
* Filtering out duplicates by using hashmap__add, which won't
* add existing entry.
*/
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
if (!f)
return -EINVAL;
@@ -376,8 +381,10 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
continue;
err = hashmap__add(map, name, 0);
- if (err == -EEXIST)
+ if (err == -EEXIST) {
+ err = 0;
continue;
+ }
if (err)
goto error;
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
new file mode 100644
index 000000000000..76f1da877f81
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "local_kptr_stash.skel.h"
+static void test_local_kptr_stash_simple(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.unstash_rb_node), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_EQ(opts.retval, 42, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+void test_local_kptr_stash_success(void)
+{
+ if (test__start_subtest("local_kptr_stash_simple"))
+ test_local_kptr_stash_simple();
+ if (test__start_subtest("local_kptr_stash_unstash"))
+ test_local_kptr_stash_unstash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index 239e1c5753b0..bc27170bdeb0 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -24,6 +24,7 @@ static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
bpf_program__set_autoload(skel->progs.bad_relo, true);
memset(log_buf, 0, sizeof(log_buf));
bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+ bpf_program__set_log_level(skel->progs.bad_relo, 1 | 8); /* BPF_LOG_FIXED to force truncation */
err = test_log_fixup__load(skel);
if (!ASSERT_ERR(err, "load_fail"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ops.c b/tools/testing/selftests/bpf/prog_tests/map_ops.c
new file mode 100644
index 000000000000..be5e42a413b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ops.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "test_map_ops.skel.h"
+#include "test_progs.h"
+
+static void map_update(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void map_delete(void)
+{
+ (void)syscall(__NR_getppid);
+}
+
+static void map_push(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void map_pop(void)
+{
+ (void)syscall(__NR_geteuid);
+}
+
+static void map_peek(void)
+{
+ (void)syscall(__NR_getgid);
+}
+
+static void map_for_each_pass(void)
+{
+ (void)syscall(__NR_gettid);
+}
+
+static void map_for_each_fail(void)
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static int setup(struct test_map_ops **skel)
+{
+ int err = 0;
+
+ if (!skel)
+ return -1;
+
+ *skel = test_map_ops__open();
+ if (!ASSERT_OK_PTR(*skel, "test_map_ops__open"))
+ return -1;
+
+ (*skel)->rodata->pid = getpid();
+
+ err = test_map_ops__load(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__load"))
+ return err;
+
+ err = test_map_ops__attach(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__attach"))
+ return err;
+
+ return err;
+}
+
+static void teardown(struct test_map_ops **skel)
+{
+ if (skel && *skel)
+ test_map_ops__destroy(*skel);
+}
+
+static void map_ops_update_delete_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_update();
+ ASSERT_OK(skel->bss->err, "map_update_initial");
+
+ map_update();
+ ASSERT_LT(skel->bss->err, 0, "map_update_existing");
+ ASSERT_EQ(skel->bss->err, -EEXIST, "map_update_existing");
+
+ map_delete();
+ ASSERT_OK(skel->bss->err, "map_delete_existing");
+
+ map_delete();
+ ASSERT_LT(skel->bss->err, 0, "map_delete_non_existing");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_delete_non_existing");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_push_peek_pop_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_push();
+ ASSERT_OK(skel->bss->err, "map_push_initial");
+
+ map_push();
+ ASSERT_LT(skel->bss->err, 0, "map_push_when_full");
+ ASSERT_EQ(skel->bss->err, -E2BIG, "map_push_when_full");
+
+ map_peek();
+ ASSERT_OK(skel->bss->err, "map_peek");
+
+ map_pop();
+ ASSERT_OK(skel->bss->err, "map_pop");
+
+ map_peek();
+ ASSERT_LT(skel->bss->err, 0, "map_peek_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_peek_when_empty");
+
+ map_pop();
+ ASSERT_LT(skel->bss->err, 0, "map_pop_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_pop_when_empty");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_for_each_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_for_each_pass();
+ /* expect to iterate over 1 element */
+ ASSERT_EQ(skel->bss->err, 1, "map_for_each_no_flags");
+
+ map_for_each_fail();
+ ASSERT_LT(skel->bss->err, 0, "map_for_each_with_flags");
+ ASSERT_EQ(skel->bss->err, -EINVAL, "map_for_each_with_flags");
+
+teardown:
+ teardown(&skel);
+}
+
+void test_map_ops(void)
+{
+ if (test__start_subtest("map_ops_update_delete"))
+ map_ops_update_delete_subtest();
+
+ if (test__start_subtest("map_ops_push_peek_pop"))
+ map_ops_push_peek_pop_subtest();
+
+ if (test__start_subtest("map_ops_for_each"))
+ map_ops_for_each_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
new file mode 100644
index 000000000000..c7636e18b1eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+#include "cgroup_helpers.h"
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_fentry_shadow_test";
+
+static int get_bpf_testmod_btf_fd(void)
+{
+ struct bpf_btf_info info;
+ char name[64];
+ __u32 id = 0, len;
+ int err, fd;
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ log_err("failed to iterate BTF objects");
+ return err;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue; /* expected race: BTF was unloaded */
+ err = -errno;
+ log_err("failed to get FD for BTF object #%d", id);
+ return err;
+ }
+
+ len = sizeof(info);
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err = -errno;
+ log_err("failed to get info for BTF object #%d", id);
+ close(fd);
+ return err;
+ }
+
+ if (strcmp(name, module_name) == 0)
+ return fd;
+
+ close(fd);
+ }
+ return -ENOENT;
+}
+
+void test_module_fentry_shadow(void)
+{
+ struct btf *vmlinux_btf = NULL, *mod_btf = NULL;
+ int err, i;
+ int btf_fd[2] = {};
+ int prog_fd[2] = {};
+ int link_fd[2] = {};
+ __s32 btf_id[2] = {};
+
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ );
+
+ const struct bpf_insn trace_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux_btf"))
+ return;
+
+ btf_fd[1] = get_bpf_testmod_btf_fd();
+ if (!ASSERT_GE(btf_fd[1], 0, "get_bpf_testmod_btf_fd"))
+ goto out;
+
+ mod_btf = btf_get_from_fd(btf_fd[1], vmlinux_btf);
+ if (!ASSERT_OK_PTR(mod_btf, "btf_get_from_fd"))
+ goto out;
+
+ btf_id[0] = btf__find_by_name_kind(vmlinux_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[0], 0, "btf_find_by_name"))
+ goto out;
+
+ btf_id[1] = btf__find_by_name_kind(mod_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[1], 0, "btf_find_by_name"))
+ goto out;
+
+ for (i = 0; i < 2; i++) {
+ load_opts.attach_btf_id = btf_id[i];
+ load_opts.attach_btf_obj_fd = btf_fd[i];
+ prog_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ sizeof(trace_program) / sizeof(struct bpf_insn),
+ &load_opts);
+ if (!ASSERT_GE(prog_fd[i], 0, "bpf_prog_load"))
+ goto out;
+
+ /* If the verifier incorrectly resolves addresses of the
+ * shadowed functions and uses the same address for both the
+ * vmlinux and the bpf_testmod functions, this will fail on
+ * attempting to create two trampolines for the same address,
+ * which is forbidden.
+ */
+ link_fd[i] = bpf_link_create(prog_fd[i], 0, BPF_TRACE_FENTRY, NULL);
+ if (!ASSERT_GE(link_fd[i], 0, "bpf_link_create"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd[0], NULL);
+ ASSERT_OK(err, "running test");
+
+out:
+ btf__free(vmlinux_btf);
+ btf__free(mod_btf);
+ for (i = 0; i < 2; i++) {
+ if (btf_fd[i])
+ close(btf_fd[i]);
+ if (prog_fd[i] > 0)
+ close(prog_fd[i]);
+ if (link_fd[i] > 0)
+ close(link_fd[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 33144c9432ae..f4aad35afae1 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -63,7 +63,8 @@ void test_perf_event_stackmap(void)
PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES |
PERF_SAMPLE_BRANCH_CALL_STACK,
- .sample_period = 5000,
+ .freq = 1,
+ .sample_freq = read_perf_max_sample_freq(),
.size = sizeof(struct perf_event_attr),
};
struct perf_event_stackmap *skel;
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index d63a20fbed33..b15b343ebb6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -64,8 +64,12 @@ static void test_send_signal_common(struct perf_event_attr *attr,
ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
- for (int i = 0; i < 1000000000 && !sigusr1_received; i++)
+ for (int i = 0; i < 1000000000 && !sigusr1_received; i++) {
j /= i + j + 1;
+ if (!attr)
+ /* trigger the nanosleep tracepoint program. */
+ usleep(1);
+ }
buf[0] = sigusr1_received ? '2' : '0';
ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index 9ad09a6c538a..b7ba5cd47d96 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -7,13 +7,12 @@ void test_stacktrace_build_id(void)
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
struct test_stacktrace_build_id *skel;
- int err, stack_trace_len;
+ int err, stack_trace_len, build_id_size;
__u32 key, prev_key, val, duration = 0;
- char buf[256];
- int i, j;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
int build_id_matches = 0;
- int retry = 1;
+ int i, retry = 1;
retry:
skel = test_stacktrace_build_id__open_and_load();
@@ -52,9 +51,10 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
- if (CHECK(err, "get build_id with readelf",
+ if (CHECK(err, "read_build_id",
"err %d errno %d\n", err, errno))
goto cleanup;
@@ -64,8 +64,6 @@ retry:
goto cleanup;
do {
- char build_id[64];
-
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
@@ -73,10 +71,7 @@ retry:
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
prev_key = key;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f4ea1a215ce4..5db9eec24b5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -2,21 +2,6 @@
#include <test_progs.h>
#include "test_stacktrace_build_id.skel.h"
-static __u64 read_perf_max_sample_freq(void)
-{
- __u64 sample_freq = 5000; /* fallback to 5000 on error */
- FILE *f;
- __u32 duration = 0;
-
- f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
- if (f == NULL)
- return sample_freq;
- CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
- "return default value: 5000,err %d\n", -errno);
- fclose(f);
- return sample_freq;
-}
-
void test_stacktrace_build_id_nmi(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd;
@@ -28,11 +13,10 @@ void test_stacktrace_build_id_nmi(void)
.config = PERF_COUNT_HW_CPU_CYCLES,
};
__u32 key, prev_key, val, duration = 0;
- char buf[256];
- int i, j;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
- int build_id_matches = 0;
- int retry = 1;
+ int build_id_matches = 0, build_id_size;
+ int i, retry = 1;
attr.sample_freq = read_perf_max_sample_freq();
@@ -94,7 +78,8 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
@@ -106,8 +91,6 @@ retry:
goto cleanup;
do {
- char build_id[64];
-
err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key),
id_offs, sizeof(id_offs), 0);
if (CHECK(err, "lookup_elem from stackmap",
@@ -116,10 +99,7 @@ retry:
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
prev_key = key;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index c717741bf8b6..c91eda624657 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -17,8 +17,13 @@ static void test_task_fd_query_tp_core(const char *probe_name,
if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto close_prog;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/%s/id", probe_name);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index f79fa5bc9a8d..740d5f644b40 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -73,11 +73,12 @@ static const char * const success_tests[] = {
"test_task_acquire_release_current",
"test_task_acquire_leave_in_map",
"test_task_xchg_release",
- "test_task_get_release",
+ "test_task_map_acquire_release",
"test_task_current_acquire_release",
"test_task_from_pid_arg",
"test_task_from_pid_current",
"test_task_from_pid_invalid",
+ "task_kfunc_acquire_trusted_walked",
};
void test_task_kfunc(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 5cf85d0f9827..13bcaeb028b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -151,7 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp,
const struct bpf_test_option *act,
const char *hdr_desc)
{
- if (!ASSERT_OK(memcmp(exp, act, sizeof(*exp)), hdr_desc)) {
+ if (!ASSERT_EQ(memcmp(exp, act, sizeof(*exp)), 0, hdr_desc)) {
print_option(exp, "expected: ");
print_option(act, " actual: ");
return -1;
@@ -169,7 +169,7 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd,
"map_lookup(hdr_stg_map_fd)"))
return -1;
- if (!ASSERT_OK(memcmp(exp, &act, sizeof(*exp)), stg_desc)) {
+ if (!ASSERT_EQ(memcmp(exp, &act, sizeof(*exp)), 0, stg_desc)) {
print_hdr_stg(exp, "expected: ");
print_hdr_stg(&act, " actual: ");
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 9c77cd6b1eaf..bcf2e1905ed7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -13,8 +13,6 @@
#include "network_helpers.h"
#include "task_local_storage_helpers.h"
-static unsigned int duration;
-
#define TEST_STORAGE_VALUE 0xbeefdead
struct storage {
@@ -25,7 +23,7 @@ struct storage {
/* Fork and exec the provided rm binary and return the exit code of the
* forked process and its pid.
*/
-static int run_self_unlink(int *monitored_pid, const char *rm_path)
+static int run_self_unlink(struct local_storage *skel, const char *rm_path)
{
int child_pid, child_status, ret;
int null_fd;
@@ -37,7 +35,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
dup2(null_fd, STDERR_FILENO);
close(null_fd);
- *monitored_pid = getpid();
+ skel->bss->monitored_pid = getpid();
/* Use the copied /usr/bin/rm to delete itself
* /tmp/copy_of_rm /tmp/copy_of_rm.
*/
@@ -46,6 +44,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
exit(errno);
} else if (child_pid > 0) {
waitpid(child_pid, &child_status, 0);
+ ASSERT_EQ(skel->data->task_storage_result, 0, "task_storage_result");
return WEXITSTATUS(child_status);
}
@@ -60,36 +59,30 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
/* Looking up an existing element should fail initially */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
return false;
/* Create a new element */
err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
- if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
- errno))
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
return false;
/* Lookup the newly created element */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
- if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
- errno))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
return false;
/* Check the value of the newly created element */
- if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem",
- "value got = %x errno:%d", lookup_val.value, val.value))
+ if (!ASSERT_EQ(lookup_val.value, val.value, "bpf_map_lookup_elem"))
return false;
err = bpf_map_delete_elem(map_fd, &obj_fd);
- if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err,
- errno))
+ if (!ASSERT_OK(err, "bpf_map_delete_elem()"))
return false;
/* The lookup should fail, now that the element has been deleted */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
return false;
return true;
@@ -104,35 +97,32 @@ void test_test_local_storage(void)
char cmd[256];
skel = local_storage__open_and_load();
- if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto close_prog;
err = local_storage__attach(skel);
- if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "attach"))
goto close_prog;
task_fd = sys_pidfd_open(getpid(), 0);
- if (CHECK(task_fd < 0, "pidfd_open",
- "failed to get pidfd err:%d, errno:%d", task_fd, errno))
+ if (!ASSERT_GE(task_fd, 0, "pidfd_open"))
goto close_prog;
if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map),
task_fd))
goto close_prog;
- if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp",
- "unable to create tmpdir: %d\n", errno))
+ if (!ASSERT_OK_PTR(mkdtemp(tmp_dir_path), "mkdtemp"))
goto close_prog;
snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm",
tmp_dir_path);
snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path);
- if (CHECK_FAIL(system(cmd)))
+ if (!ASSERT_OK(system(cmd), "system(cp)"))
goto close_prog_rmdir;
rm_fd = open(tmp_exec_path, O_RDONLY);
- if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d",
- tmp_exec_path, rm_fd, errno))
+ if (!ASSERT_GE(rm_fd, 0, "open(tmp_exec_path)"))
goto close_prog_rmdir;
if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
@@ -144,8 +134,8 @@ void test_test_local_storage(void)
* unlink its executable. This operation should be denied by the loaded
* LSM program.
*/
- err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
- if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err))
+ err = run_self_unlink(skel, tmp_exec_path);
+ if (!ASSERT_EQ(err, EPERM, "run_self_unlink"))
goto close_prog_rmdir;
/* Set the process being monitored to be the current process */
@@ -156,18 +146,16 @@ void test_test_local_storage(void)
*/
snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr",
tmp_dir_path, tmp_dir_path);
- if (CHECK_FAIL(system(cmd)))
+ if (!ASSERT_OK(system(cmd), "system(mv)"))
goto close_prog_rmdir;
- CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
- "inode_local_storage not set\n");
+ ASSERT_EQ(skel->data->inode_storage_result, 0, "inode_storage_result");
serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
- if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ if (!ASSERT_GE(serv_sk, 0, "start_server"))
goto close_prog_rmdir;
- CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
- "sk_local_storage not set\n");
+ ASSERT_EQ(skel->data->sk_storage_result, 0, "sk_storage_result");
if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
serv_sk))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 47f1d482fe39..d149ab98798d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -89,6 +89,9 @@
#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00"
#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11"
+#define IPIP_TUNL_DEV0 "ipip00"
+#define IPIP_TUNL_DEV1 "ipip11"
+
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
@@ -188,6 +191,79 @@ static void delete_ip6vxlan_tunnel(void)
SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1);
}
+enum ipip_encap {
+ NONE = 0,
+ FOU = 1,
+ GUE = 2,
+};
+
+static int set_ipip_encap(const char *ipproto, const char *type)
+{
+ SYS(fail, "ip -n at_ns0 fou add port 5555 %s", ipproto);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap %s",
+ IPIP_TUNL_DEV0, type);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap-dport 5555",
+ IPIP_TUNL_DEV0);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_ipip_tunnel(enum ipip_encap encap)
+{
+ int err;
+ const char *ipproto, *type;
+
+ switch (encap) {
+ case FOU:
+ ipproto = "ipproto 4";
+ type = "fou";
+ break;
+ case GUE:
+ ipproto = "gue";
+ type = ipproto;
+ break;
+ default:
+ ipproto = NULL;
+ type = ipproto;
+ }
+
+ /* at_ns0 namespace */
+ SYS(fail, "ip -n at_ns0 link add dev %s type ipip local %s remote %s",
+ IPIP_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ if (type && ipproto) {
+ err = set_ipip_encap(ipproto, type);
+ if (!ASSERT_OK(err, "set_ipip_encap"))
+ goto fail;
+ }
+
+ SYS(fail, "ip -n at_ns0 link set dev %s up", IPIP_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24",
+ IPIP_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace */
+ if (type && ipproto)
+ SYS(fail, "ip fou add port 5555 %s", ipproto);
+ SYS(fail, "ip link add dev %s type ipip external", IPIP_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", IPIP_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", IPIP_TUNL_DEV1,
+ IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_ipip_tunnel(void)
+{
+ SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0);
+ SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null");
+ SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1);
+ SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
+}
+
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -386,10 +462,80 @@ done:
test_tunnel_kern__destroy(skel);
}
-#define RUN_TEST(name) \
+static void test_ipip_tunnel(enum ipip_encap encap)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int set_src_prog_fd, get_src_prog_fd;
+ int ifindex = -1;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ /* add ipip tunnel */
+ err = add_ipip_tunnel(encap);
+ if (!ASSERT_OK(err, "add_ipip_tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ ifindex = if_nametoindex(IPIP_TUNL_DEV1);
+ if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+
+ switch (encap) {
+ case FOU:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_fou_set_tunnel);
+ break;
+ case GUE:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_gue_set_tunnel);
+ break;
+ default:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_set_tunnel);
+ }
+
+ if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* ping from root namespace test */
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+ close_netns(nstoken);
+
+done:
+ /* delete ipip tunnel */
+ delete_ipip_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
- test_ ## name(); \
+ test_ ## name(__VA_ARGS__); \
} \
})
@@ -400,6 +546,9 @@ static void *test_tunnel_run_tests(void *arg)
RUN_TEST(vxlan_tunnel);
RUN_TEST(ip6vxlan_tunnel);
+ RUN_TEST(ipip_tunnel, NONE);
+ RUN_TEST(ipip_tunnel, FOU);
+ RUN_TEST(ipip_tunnel, GUE);
cleanup();
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index 770fcc3bb1ba..655d69f0ff0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -16,8 +16,13 @@ void serial_test_tp_attach_query(void)
for (i = 0; i < num_progs; i++)
obj[i] = NULL;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/sched/sched_switch/id");
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index cade7f12315f..7b9124d506a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -5,7 +5,8 @@
#include "trace_printk.lskel.h"
-#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
void serial_test_trace_printk(void)
@@ -34,8 +35,11 @@ void serial_test_trace_printk(void)
if (!ASSERT_OK(err, "trace_printk__attach"))
goto cleanup;
- fp = fopen(TRACEBUF, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)"))
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
goto cleanup;
/* We do not want to wait forever if this test fails... */
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
index 7a4e313e8558..44ea2fd88f4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -5,7 +5,8 @@
#include "trace_vprintk.lskel.h"
-#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10"
void serial_test_trace_vprintk(void)
@@ -27,8 +28,11 @@ void serial_test_trace_vprintk(void)
if (!ASSERT_OK(err, "trace_vprintk__attach"))
goto cleanup;
- fp = fopen(TRACEBUF, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)"))
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
goto cleanup;
/* We do not want to wait forever if this test fails... */
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
index 6558c857e620..d5b3377aa33c 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -3,7 +3,6 @@
#include <test_progs.h>
#include "test_uprobe_autoattach.skel.h"
-#include "progs/bpf_misc.h"
/* uprobe attach point */
static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
new file mode 100644
index 000000000000..73dff693d411
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "cap_helpers.h"
+#include "verifier_and.skel.h"
+#include "verifier_array_access.skel.h"
+#include "verifier_basic_stack.skel.h"
+#include "verifier_bounds_deduction.skel.h"
+#include "verifier_bounds_deduction_non_const.skel.h"
+#include "verifier_bounds_mix_sign_unsign.skel.h"
+#include "verifier_cfg.skel.h"
+#include "verifier_cgroup_inv_retcode.skel.h"
+#include "verifier_cgroup_skb.skel.h"
+#include "verifier_cgroup_storage.skel.h"
+#include "verifier_const_or.skel.h"
+#include "verifier_ctx_sk_msg.skel.h"
+#include "verifier_direct_stack_access_wraparound.skel.h"
+#include "verifier_div0.skel.h"
+#include "verifier_div_overflow.skel.h"
+#include "verifier_helper_access_var_len.skel.h"
+#include "verifier_helper_packet_access.skel.h"
+#include "verifier_helper_restricted.skel.h"
+#include "verifier_helper_value_access.skel.h"
+#include "verifier_int_ptr.skel.h"
+#include "verifier_ld_ind.skel.h"
+#include "verifier_leak_ptr.skel.h"
+#include "verifier_map_ptr.skel.h"
+#include "verifier_map_ret_val.skel.h"
+#include "verifier_masking.skel.h"
+#include "verifier_meta_access.skel.h"
+#include "verifier_raw_stack.skel.h"
+#include "verifier_raw_tp_writable.skel.h"
+#include "verifier_ringbuf.skel.h"
+#include "verifier_spill_fill.skel.h"
+#include "verifier_stack_ptr.skel.h"
+#include "verifier_uninit.skel.h"
+#include "verifier_value_adj_spill.skel.h"
+#include "verifier_value.skel.h"
+#include "verifier_value_or_null.skel.h"
+#include "verifier_var_off.skel.h"
+#include "verifier_xadd.skel.h"
+#include "verifier_xdp.skel.h"
+#include "verifier_xdp_direct_packet_access.skel.h"
+
+__maybe_unused
+static void run_tests_aux(const char *skel_name, skel_elf_bytes_fn elf_bytes_factory)
+{
+ struct test_loader tester = {};
+ __u64 old_caps;
+ int err;
+
+ /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
+ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ return;
+ }
+
+ test_loader__run_subtests(&tester, skel_name, elf_bytes_factory);
+ test_loader_fini(&tester);
+
+ err = cap_enable_effective(old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+}
+
+#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes)
+
+void test_verifier_and(void) { RUN(verifier_and); }
+void test_verifier_array_access(void) { RUN(verifier_array_access); }
+void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
+void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); }
+void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
+void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
+void test_verifier_cfg(void) { RUN(verifier_cfg); }
+void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); }
+void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); }
+void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); }
+void test_verifier_const_or(void) { RUN(verifier_const_or); }
+void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
+void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
+void test_verifier_div0(void) { RUN(verifier_div0); }
+void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
+void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
+void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
+void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
+void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
+void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
+void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
+void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
+void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); }
+void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); }
+void test_verifier_masking(void) { RUN(verifier_masking); }
+void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
+void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
+void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); }
+void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); }
+void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
+void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_uninit(void) { RUN(verifier_uninit); }
+void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); }
+void test_verifier_value(void) { RUN(verifier_value); }
+void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); }
+void test_verifier_var_off(void) { RUN(verifier_var_off); }
+void test_verifier_xadd(void) { RUN(verifier_xadd); }
+void test_verifier_xdp(void) { RUN(verifier_xdp); }
+void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
new file mode 100644
index 000000000000..8337c6bc5b95
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+
+
+static bool check_prog_load(int prog_fd, bool expect_err, const char *tag)
+{
+ if (expect_err) {
+ if (!ASSERT_LT(prog_fd, 0, tag)) {
+ close(prog_fd);
+ return false;
+ }
+ } else /* !expect_err */ {
+ if (!ASSERT_GT(prog_fd, 0, tag))
+ return false;
+ }
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return true;
+}
+
+static struct {
+ /* strategically placed before others to avoid accidental modification by kernel */
+ char filler[1024];
+ char buf[1024];
+ /* strategically placed after buf[] to catch more accidental corruptions */
+ char reference[1024];
+} logs;
+static const struct bpf_insn *insns;
+static size_t insn_cnt;
+
+static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error)
+{
+ int prog_fd;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_prog",
+ "GPL", insns, insn_cnt, opts);
+ check_prog_load(prog_fd, expect_load_error, "prog_load");
+
+ return prog_fd;
+}
+
+static void verif_log_subtest(const char *name, bool expect_load_error, int log_level)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ char *exp_log, prog_name[16], op_name[32];
+ struct test_log_buf *skel;
+ struct bpf_program *prog;
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, mode, err, prog_fd, res;
+
+ skel = test_log_buf__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ if (strcmp(bpf_program__name(prog), name) == 0)
+ bpf_program__set_autoload(prog, true);
+ else
+ bpf_program__set_autoload(prog, false);
+ }
+
+ err = test_log_buf__load(skel);
+ if (!expect_load_error && !ASSERT_OK(err, "unexpected_load_failure"))
+ goto cleanup;
+ if (expect_load_error && !ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ insns = bpf_program__insns(skel->progs.good_prog);
+ insn_cnt = bpf_program__insn_cnt(skel->progs.good_prog);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = log_level | 8 /* BPF_LOG_FIXED */;
+ load_prog(&opts, expect_load_error);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED works as verifier log used to work, that is:
+ * we get -ENOSPC and beginning of the full verifier log. This only
+ * works for log_level 2 and log_level 1 + failed program. For log
+ * level 2 we don't reset log at all. For log_level 1 + failed program
+ * we don't get to verification stats output. With log level 1
+ * for successful program final result will be just verifier stats.
+ * But if provided too short log buf, kernel will NULL-out log->ubuf
+ * and will stop emitting further log. This means we'll never see
+ * predictable verifier stats.
+ * Long story short, we do the following -ENOSPC test only for
+ * predictable combinations.
+ */
+ if (log_level >= 2 || expect_load_error) {
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* fixed-length log */
+ opts.log_size = 25;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed25",
+ "GPL", insns, insn_cnt, &opts);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, "unexpected_log_fixed_prog_load_result")) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25"))
+ goto cleanup;
+ if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, "log_fixed_contents_25"))
+ goto cleanup;
+ }
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+
+ /* rotating mode, then fixed mode */
+ for (mode = 1; mode >= 0; mode--) {
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+ opts.log_level = log_level | (mode ? 0 : 8 /* BPF_LOG_FIXED */);
+
+ snprintf(prog_name, sizeof(prog_name),
+ "log_%s_%d", mode ? "roll" : "fixed", i);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, prog_name,
+ "GPL", insns, insn_cnt, &opts);
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_prog_load_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, op_name)) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_strlen_%d", mode ? "roll" : "fixed", i);
+ ASSERT_EQ(strlen(logs.buf), i - 1, op_name);
+
+ if (mode)
+ exp_log = logs.reference + fixed_log_sz - i;
+ else
+ exp_log = logs.reference;
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_contents_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STRNEQ(logs.buf, exp_log, i - 1, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strncmp(logs.buf, exp_log, i - 1),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf is not overwritten */
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_unused_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_rolling");
+
+cleanup:
+ test_log_buf__destroy(skel);
+}
+
+static const void *btf_data;
+static u32 btf_data_sz;
+
+static int load_btf(struct bpf_btf_load_opts *opts, bool expect_err)
+{
+ int fd;
+
+ fd = bpf_btf_load(btf_data, btf_data_sz, opts);
+ if (fd >= 0)
+ close(fd);
+ if (expect_err)
+ ASSERT_LT(fd, 0, "btf_load_failure");
+ else /* !expect_err */
+ ASSERT_GT(fd, 0, "btf_load_success");
+ return fd;
+}
+
+static void verif_btf_log_subtest(bool bad_btf)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ struct btf *btf;
+ struct btf_type *t;
+ char *exp_log, op_name[32];
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, res;
+
+ /* prepare simple BTF contents */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf_new_empty"))
+ return;
+ res = btf__add_int(btf, "whatever", 4, 0);
+ if (!ASSERT_GT(res, 0, "btf_add_int_id"))
+ goto cleanup;
+ if (bad_btf) {
+ /* btf__add_int() doesn't allow bad value of size, so we'll just
+ * force-cast btf_type pointer and manually override size to invalid
+ * 3 if we need to simulate failure
+ */
+ t = (void *)btf__type_by_id(btf, res);
+ if (!ASSERT_OK_PTR(t, "int_btf_type"))
+ goto cleanup;
+ t->size = 3;
+ }
+
+ btf_data = btf__raw_data(btf, &btf_data_sz);
+ if (!ASSERT_OK_PTR(btf_data, "btf_data"))
+ goto cleanup;
+
+ load_btf(&opts, bad_btf);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = 1 | 8 /* BPF_LOG_FIXED */;
+ load_btf(&opts, bad_btf);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED truncation works as verifier log used to work */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* fixed-length log */
+ opts.log_size = 25;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "half_log_fd");
+ ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25");
+ ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name);
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1; /* rolling log */
+
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+
+ snprintf(op_name, sizeof(op_name), "log_roll_btf_load_%d", i);
+ res = load_btf(&opts, true);
+ if (!ASSERT_EQ(res, -ENOSPC, op_name))
+ goto cleanup;
+
+ exp_log = logs.reference + fixed_log_sz - i;
+ snprintf(op_name, sizeof(op_name), "log_roll_contents_%d", i);
+ if (!ASSERT_STREQ(logs.buf, exp_log, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf, exp_log),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf are not overwritten */
+ snprintf(op_name, sizeof(op_name), "log_roll_unused_tail_%d", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_rolling");
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_verifier_log(void)
+{
+ if (test__start_subtest("good_prog-level1"))
+ verif_log_subtest("good_prog", false, 1);
+ if (test__start_subtest("good_prog-level2"))
+ verif_log_subtest("good_prog", false, 2);
+ if (test__start_subtest("bad_prog-level1"))
+ verif_log_subtest("bad_prog", true, 1);
+ if (test__start_subtest("bad_prog-level2"))
+ verif_log_subtest("bad_prog", true, 2);
+ if (test__start_subtest("bad_btf"))
+ verif_btf_log_subtest(true /* bad btf */);
+ if (test__start_subtest("good_btf"))
+ verif_btf_log_subtest(false /* !bad btf */);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 662b6c6c5ed7..c94eb63b7b77 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -87,12 +87,12 @@ static void test_max_pkt_size(int fd)
void test_xdp_do_redirect(void)
{
int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
- char data[sizeof(pkt_udp) + sizeof(__u32)];
+ char data[sizeof(pkt_udp) + sizeof(__u64)];
struct test_xdp_do_redirect *skel = NULL;
struct nstoken *nstoken = NULL;
struct bpf_link *link;
LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
- struct xdp_md ctx_in = { .data = sizeof(__u32),
+ struct xdp_md ctx_in = { .data = sizeof(__u64),
.data_end = sizeof(data) };
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
@@ -106,8 +106,9 @@ void test_xdp_do_redirect(void)
DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
.attach_point = BPF_TC_INGRESS);
- memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
+ memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
*((__u32 *)data) = 0x42; /* metadata test value */
+ *((__u32 *)data + 4) = 0;
skel = test_xdp_do_redirect__open();
if (!ASSERT_OK_PTR(skel, "skel"))
@@ -159,8 +160,7 @@ void test_xdp_do_redirect(void)
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
- NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
- NETDEV_XDP_ACT_NDO_XMIT_SG,
+ NETDEV_XDP_ACT_RX_SG,
"veth_src query_opts.feature_flags"))
goto out;
@@ -170,9 +170,34 @@ void test_xdp_do_redirect(void)
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "veth_dst query_opts.feature_flags"))
+ goto out;
+
+ /* Enable GRO */
+ SYS("ethtool -K veth_src gro on");
+ SYS("ethtool -K veth_dst gro on");
+
+ err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
NETDEV_XDP_ACT_NDO_XMIT_SG,
- "veth_dst query_opts.feature_flags"))
+ "veth_src query_opts.feature_flags gro on"))
+ goto out;
+
+ err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "veth_dst query_opts.feature_flags gro on"))
goto out;
memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 490e851dc27d..626c461fa34d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -268,6 +268,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
return -1;
+ ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
+
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
new file mode 100644
index 000000000000..e4bfbba6c193
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+long create_errs = 0;
+long create_cnts = 0;
+long kmalloc_cnts = 0;
+__u32 bench_pid = 0;
+
+struct storage {
+ __u8 data[64];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} sk_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} task_storage_map SEC(".maps");
+
+SEC("raw_tp/kmalloc")
+int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
+ size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
+ int node)
+{
+ __sync_fetch_and_add(&kmalloc_cnts, 1);
+
+ return 0;
+}
+
+SEC("tp_btf/sched_process_fork")
+int BPF_PROG(sched_process_fork, struct task_struct *parent, struct task_struct *child)
+{
+ struct storage *stg;
+
+ if (parent->tgid != bench_pid)
+ return 0;
+
+ stg = bpf_task_storage_get(&task_storage_map, child, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct storage *stg;
+ __u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != bench_pid)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_storage_map, sock->sk, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
index 9ba14c37bbcc..5ddcc46fd886 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -33,7 +33,6 @@ int dump_ksym(struct bpf_iter__ksym *ctx)
__u32 seq_num = ctx->meta->seq_num;
unsigned long value;
char type;
- int ret;
if (!iter)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
index b77adfd55d73..ec7f91850dec 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -42,7 +42,6 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx)
char cur_cc[TCP_CA_NAME_MAX];
struct tcp_sock *tp;
struct sock *sk;
- int ret;
if (!bpf_tcp_sk(ctx->sk_common))
return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index de1fc82d2710..1d194455b109 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -138,8 +138,6 @@ static int callback_set_0f(int i, void *ctx)
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int prog_non_constant_callback(void *ctx)
{
- struct callback_ctx data = {};
-
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index f704885aa534..6e3b4903c541 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -5,12 +5,42 @@
/* This set of attributes controls behavior of the
* test_loader.c:test_loader__run_subtests().
*
+ * The test_loader sequentially loads each program in a skeleton.
+ * Programs could be loaded in privileged and unprivileged modes.
+ * - __success, __failure, __msg imply privileged mode;
+ * - __success_unpriv, __failure_unpriv, __msg_unpriv imply
+ * unprivileged mode.
+ * If combination of privileged and unprivileged attributes is present
+ * both modes are used. If none are present privileged mode is implied.
+ *
+ * See test_loader.c:drop_capabilities() for exact set of capabilities
+ * that differ between privileged and unprivileged modes.
+ *
+ * For test filtering purposes the name of the program loaded in
+ * unprivileged mode is derived from the usual program name by adding
+ * `@unpriv' suffix.
+ *
* __msg Message expected to be found in the verifier log.
* Multiple __msg attributes could be specified.
+ * __msg_unpriv Same as __msg but for unprivileged mode.
*
* __success Expect program load success in privileged mode.
+ * __success_unpriv Expect program load success in unprivileged mode.
*
* __failure Expect program load failure in privileged mode.
+ * __failure_unpriv Expect program load failure in unprivileged mode.
+ *
+ * __retval Execute the program using BPF_PROG_TEST_RUN command,
+ * expect return value to match passed parameter:
+ * - a decimal number
+ * - a hexadecimal number, when starts from 0x
+ * - literal INT_MIN
+ * - literal POINTER_VALUE (see definition below)
+ * - literal TEST_DATA_LEN (see definition below)
+ * __retval_unpriv Same, but load program in unprivileged mode.
+ *
+ * __description Text to be used instead of a program name for display
+ * and filtering purposes.
*
* __log_level Log level to use for the program, numeric value expected.
*
@@ -27,15 +57,28 @@
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
+#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
+#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg)))
+#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
+#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val)))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
/* Convenience macro for use with 'asm volatile' blocks */
#define __naked __attribute__((naked))
#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory"
#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory"
#define __imm(name) [name]"i"(name)
+#define __imm_const(name, expr) [name]"i"(expr)
#define __imm_addr(name) [name]"i"(&name)
+#define __imm_ptr(name) [name]"p"(&name)
+#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+
+/* Magic constants used with __retval() */
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
#if defined(__TARGET_ARCH_x86)
#define SYSCALL_WRAPPER 1
@@ -75,5 +118,110 @@
#define FUNC_REG_ARG_CNT 5
#endif
+/* make it look to compiler like value is read and written */
+#define __sink(expr) asm volatile("" : "+g"(expr))
+
+struct bpf_iter_num;
+
+extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym;
+extern int *bpf_iter_num_next(struct bpf_iter_num *it) __ksym;
+extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym;
+
+#ifndef bpf_for_each
+/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for
+ * using BPF open-coded iterators without having to write mundane explicit
+ * low-level loop logic. Instead, it provides for()-like generic construct
+ * that can be used pretty naturally. E.g., for some hypothetical cgroup
+ * iterator, you'd write:
+ *
+ * struct cgroup *cg, *parent_cg = <...>;
+ *
+ * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {
+ * bpf_printk("Child cgroup id = %d", cg->cgroup_id);
+ * if (cg->cgroup_id == 123)
+ * break;
+ * }
+ *
+ * I.e., it looks almost like high-level for each loop in other languages,
+ * supports continue/break, and is verifiable by BPF verifier.
+ *
+ * For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
+ * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
+ * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
+ * *`, not just `int`. So for integers bpf_for() is more convenient.
+ *
+ * Note: this macro relies on C99 feature of allowing to declare variables
+ * inside for() loop, bound to for() loop lifetime. It also utilizes GCC
+ * extension: __attribute__((cleanup(<func>))), supported by both GCC and
+ * Clang.
+ */
+#define bpf_for_each(type, cur, args...) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \
+ cleanup(bpf_iter_##type##_destroy))), \
+ /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_##type##_new(&___it, ##args), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_##type##_destroy, (void *)0); \
+ /* iteration and termination check */ \
+ (((cur) = bpf_iter_##type##_next(&___it))); \
+)
+#endif /* bpf_for_each */
+
+#ifndef bpf_for
+/* bpf_for(i, start, end) implements a for()-like looping construct that sets
+ * provided integer variable *i* to values starting from *start* through,
+ * but not including, *end*. It also proves to BPF verifier that *i* belongs
+ * to range [start, end), so this can be used for accessing arrays without
+ * extra checks.
+ *
+ * Note: *start* and *end* are assumed to be expressions with no side effects
+ * and whose values do not change throughout bpf_for() loop execution. They do
+ * not have to be statically known or constant, though.
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_for(i, start, end) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
+ cleanup(bpf_iter_num_destroy))), \
+ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_num_new(&___it, (start), (end)), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_num_destroy, (void *)0); \
+ ({ \
+ /* iteration step */ \
+ int *___t = bpf_iter_num_next(&___it); \
+ /* termination and bounds check */ \
+ (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \
+ }); \
+)
+#endif /* bpf_for */
+
+#ifndef bpf_repeat
+/* bpf_repeat(N) performs N iterations without exposing iteration number
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_repeat(N) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
+ cleanup(bpf_iter_num_destroy))), \
+ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_num_new(&___it, 0, (N)), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_num_destroy, (void *)0); \
+ bpf_iter_num_next(&___it); \
+ /* nothing here */ \
+)
+#endif /* bpf_repeat */
#endif
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index ce96b33e38d6..50f95ec61165 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -52,7 +52,6 @@ int leak_prog(void *ctx)
{
struct prog_test_ref_kfunc *p;
struct map_value *v;
- unsigned long sl;
v = bpf_map_lookup_elem(&array_map, &(int){0});
if (!v)
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
index 88638315c582..ac86a8a61605 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -66,7 +66,6 @@ static inline int is_allowed_peer_cg(struct __sk_buff *skb,
SEC("cgroup_skb/ingress")
int ingress_lookup(struct __sk_buff *skb)
{
- __u32 serv_port_key = 0;
struct ipv6hdr ip6h;
struct tcphdr tcph;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index d0b7cd0d09d7..22914a70db54 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -21,10 +21,11 @@ struct hash_map {
} __cgrps_kfunc_map SEC(".maps");
struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
-struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
{
@@ -61,6 +62,11 @@ static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp)
}
acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
old = bpf_kptr_xchg(&v->cgrp, acquired);
if (old) {
bpf_cgroup_release(old);
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index b42291ed9586..0fa564a5cc5b 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -41,6 +41,23 @@ int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path
/* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */
acquired = bpf_cgroup_acquire(v->cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ /*
+ * Can't invoke bpf_cgroup_release() without checking the return value
+ * of bpf_cgroup_acquire().
+ */
bpf_cgroup_release(acquired);
return 0;
@@ -54,7 +71,8 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
/* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */
acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp);
- bpf_cgroup_release(acquired);
+ if (acquired)
+ bpf_cgroup_release(acquired);
return 0;
}
@@ -67,7 +85,8 @@ int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
/* Can't acquire an untrusted struct cgroup * pointer. */
acquired = bpf_cgroup_acquire(cgrp);
- bpf_cgroup_release(acquired);
+ if (acquired)
+ bpf_cgroup_release(acquired);
return 0;
}
@@ -80,7 +99,8 @@ int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char
/* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */
acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp);
- bpf_cgroup_release(acquired);
+ if (acquired)
+ bpf_cgroup_release(acquired);
return 0;
}
@@ -93,9 +113,8 @@ int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
/* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */
acquired = bpf_cgroup_acquire(NULL);
- if (!acquired)
- return 0;
- bpf_cgroup_release(acquired);
+ if (acquired)
+ bpf_cgroup_release(acquired);
return 0;
}
@@ -109,57 +128,7 @@ int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *pat
acquired = bpf_cgroup_acquire(cgrp);
/* Acquired cgroup is never released. */
-
- return 0;
-}
-
-SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path)
-{
- struct cgroup *kptr;
-
- /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */
- kptr = bpf_cgroup_kptr_get(&cgrp);
- if (!kptr)
- return 0;
-
- bpf_cgroup_release(kptr);
-
- return 0;
-}
-
-SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path)
-{
- struct cgroup *kptr, *acquired;
-
- acquired = bpf_cgroup_acquire(cgrp);
-
- /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */
- kptr = bpf_cgroup_kptr_get(&acquired);
- bpf_cgroup_release(acquired);
- if (!kptr)
- return 0;
-
- bpf_cgroup_release(kptr);
-
- return 0;
-}
-
-SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path)
-{
- struct cgroup *kptr;
-
- /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */
- kptr = bpf_cgroup_kptr_get(NULL);
- if (!kptr)
- return 0;
-
- bpf_cgroup_release(kptr);
+ __sink(acquired);
return 0;
}
@@ -185,8 +154,8 @@ int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
}
SEC("tp_btf/cgroup_mkdir")
-__failure __msg("Unreleased reference")
-int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path)
{
struct cgroup *kptr;
struct __cgrps_kfunc_map_value *v;
@@ -195,17 +164,18 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
if (!v)
return 0;
- kptr = bpf_cgroup_kptr_get(&v->cgrp);
- if (!kptr)
- return 0;
-
- /* Kptr acquired above is never released. */
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (kptr)
+ /* Can't release a cgroup kptr stored in a map. */
+ bpf_cgroup_release(kptr);
+ bpf_rcu_read_unlock();
return 0;
}
SEC("tp_btf/cgroup_mkdir")
-__failure __msg("expects refcounted")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
{
struct __cgrps_kfunc_map_value *v;
@@ -233,7 +203,7 @@ int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
}
SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
{
struct __cgrps_kfunc_map_value local, *v;
@@ -255,6 +225,8 @@ int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
return -ENOENT;
acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired)
+ return -ENOENT;
old = bpf_kptr_xchg(&v->cgrp, acquired);
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 030aff700084..5354455a01be 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -38,7 +38,10 @@ int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char
return 0;
acquired = bpf_cgroup_acquire(cgrp);
- bpf_cgroup_release(acquired);
+ if (!acquired)
+ err = 1;
+ else
+ bpf_cgroup_release(acquired);
return 0;
}
@@ -123,13 +126,11 @@ int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
return 0;
}
- kptr = bpf_cgroup_kptr_get(&v->cgrp);
- if (!kptr) {
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (!kptr)
err = 3;
- return 0;
- }
-
- bpf_cgroup_release(kptr);
+ bpf_rcu_read_unlock();
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
index 6652d18465b2..8aeba1b75c83 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
@@ -84,7 +84,6 @@ int BPF_PROG(update_cookie_tracing, struct socket *sock,
struct sockaddr *uaddr, int addr_len, int flags)
{
struct socket_cookie *p;
- struct tcp_sock *tcp_sk;
if (uaddr->sa_family != AF_INET6)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index 7615dc23d301..4c7844e1dbfa 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -24,7 +24,6 @@ void bpf_rcu_read_unlock(void) __ksym;
SEC("?iter.s/cgroup")
int cgroup_iter(struct bpf_iter__cgroup *ctx)
{
- struct seq_file *seq = ctx->meta->seq;
struct cgroup *cgrp = ctx->cgroup;
long *ptr;
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index ec25371de789..7ef49ec04838 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -32,7 +32,7 @@
#define IFNAMSIZ 16
#endif
-__attribute__ ((noinline))
+__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
struct sockaddr_in sa = {};
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
index 2715fe27d4cf..004f2acef2eb 100644
--- a/tools/testing/selftests/bpf/progs/core_kern.c
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -77,7 +77,7 @@ int balancer_ingress(struct __sk_buff *ctx)
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 65e5496ca1b2..0c5b785a93e4 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -9,6 +9,9 @@
int err;
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(MASK) static struct bpf_cpumask __kptr * global_mask;
+
struct __cpumask_map_value {
struct bpf_cpumask __kptr * cpumask;
};
@@ -23,7 +26,6 @@ struct array_map {
struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
-struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym;
u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
@@ -51,6 +53,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym
u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
{
return (const struct cpumask *)cpumask;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index c16f7563b84e..a9bf6ea336cf 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -23,6 +23,7 @@ int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags)
struct bpf_cpumask *cpumask;
cpumask = create_cpumask();
+ __sink(cpumask);
/* cpumask is never released. */
return 0;
@@ -51,6 +52,7 @@ int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_fla
/* Can't acquire a non-struct bpf_cpumask. */
cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr);
+ __sink(cpumask);
return 0;
}
@@ -63,6 +65,7 @@ int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
/* Can't set the CPU of a non-struct bpf_cpumask. */
bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
+ __sink(cpumask);
return 0;
}
@@ -92,35 +95,98 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_
}
SEC("tp_btf/task_newtask")
-__failure __msg("Unreleased reference")
-int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags)
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
{
- struct bpf_cpumask *cpumask;
- struct __cpumask_map_value *v;
+ /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+ bpf_cpumask_empty(NULL);
- cpumask = create_cpumask();
- if (!cpumask)
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be a rcu pointer")
+int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
return 0;
- if (cpumask_map_insert(cpumask))
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
return 0;
+ }
- v = cpumask_map_value_lookup();
- if (!v)
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
return 0;
+ }
- cpumask = bpf_cpumask_kptr_get(&v->cpumask);
+ bpf_rcu_read_unlock();
+
+ /* RCU region is exited before calling KF_RCU kfunc. */
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
- /* cpumask is never released. */
return 0;
}
SEC("tp_btf/task_newtask")
-__failure __msg("NULL pointer passed to trusted arg0")
-int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
+__failure __msg("NULL pointer passed to trusted arg1")
+int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags)
{
- /* NULL passed to KF_TRUSTED_ARGS kfunc. */
- bpf_cpumask_empty(NULL);
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+
+ /* No NULL check is performed on global cpumask kptr. */
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *prev, *curr;
+
+ curr = bpf_cpumask_create();
+ if (!curr)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ if (prev)
+ bpf_cpumask_release(prev);
+
+ bpf_rcu_read_lock();
+ curr = global_mask;
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU passed to bpf_kptr_xchg() */
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ bpf_rcu_read_unlock();
+ if (prev)
+ bpf_cpumask_release(prev);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 1d38bc65d4b0..2fcdd7f68ac7 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -353,7 +353,6 @@ SEC("tp_btf/task_newtask")
int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
- struct __cpumask_map_value *v;
cpumask = create_cpumask();
if (!cpumask)
@@ -396,31 +395,34 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla
}
SEC("tp_btf/task_newtask")
-int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags)
+int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
{
- struct bpf_cpumask *cpumask;
- struct __cpumask_map_value *v;
+ struct bpf_cpumask *local, *prev;
- cpumask = create_cpumask();
- if (!cpumask)
+ if (!is_test_task())
return 0;
- if (cpumask_map_insert(cpumask)) {
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
err = 3;
return 0;
}
- v = cpumask_map_value_lookup();
- if (!v) {
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
err = 4;
+ bpf_rcu_read_unlock();
return 0;
}
- cpumask = bpf_cpumask_kptr_get(&v->cpumask);
- if (cpumask)
- bpf_cpumask_release(cpumask);
- else
- err = 5;
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+ bpf_rcu_read_unlock();
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 20ce920d891d..759eb5c245cd 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -271,7 +271,7 @@ SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
{
- __u32 key = 0, map_val;
+ __u32 map_val;
struct bpf_dynptr ptr;
void *data;
@@ -388,7 +388,6 @@ int data_slice_missing_null_check2(void *ctx)
/* this should fail */
*data2 = 3;
-done:
bpf_ringbuf_discard_dynptr(&ptr, 0);
return 0;
}
@@ -440,6 +439,7 @@ int invalid_write1(void *ctx)
/* this should fail */
data = bpf_dynptr_data(&ptr, 0, 1);
+ __sink(data);
return 0;
}
@@ -1374,6 +1374,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
* changing packet data
*/
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ __sink(hdr);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index c8358a7c7924..b2fa6c47ecc0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -35,7 +35,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep")
int test_read_write(void *ctx)
{
char write_data[64] = "hello there, world!!";
- char read_data[64] = {}, buf[64] = {};
+ char read_data[64] = {};
struct bpf_dynptr ptr;
int i;
@@ -170,7 +170,6 @@ int test_skb_readonly(struct __sk_buff *skb)
{
__u8 write_data[2] = {1, 2};
struct bpf_dynptr ptr;
- __u64 *data;
int ret;
if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
@@ -191,10 +190,8 @@ int test_skb_readonly(struct __sk_buff *skb)
SEC("?cgroup_skb/egress")
int test_dynptr_skb_data(struct __sk_buff *skb)
{
- __u8 write_data[2] = {1, 2};
struct bpf_dynptr ptr;
__u64 *data;
- int ret;
if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
err = 1;
diff --git a/tools/testing/selftests/bpf/progs/err.h b/tools/testing/selftests/bpf/progs/err.h
new file mode 100644
index 000000000000..d66d283d9e59
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/err.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ERR_H__
+#define __ERR_H__
+
+#define MAX_ERRNO 4095
+#define IS_ERR_VALUE(x) (unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO
+
+static inline int IS_ERR_OR_NULL(const void *ptr)
+{
+ return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+#endif /* __ERR_H__ */
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 4547b059d487..983b7c233382 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -120,8 +120,6 @@ int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var)
void *data = (void *)(long)skb->data;
struct ipv6hdr ip6, *ip6p;
int ifindex = skb->ifindex;
- __u32 eth_proto;
- __u32 nh_off;
/* check that BPF extension can read packet via direct packet access */
if (data + 14 + sizeof(ip6) > data_end)
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
index bb2a77c5b62b..370a0e1922e0 100644
--- a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -23,7 +23,7 @@ struct {
SEC("freplace/handle_kprobe")
int new_handle_kprobe(struct pt_regs *ctx)
{
- struct hmap_elem zero = {}, *val;
+ struct hmap_elem *val;
int key = 0;
val = bpf_map_lookup_elem(&hash_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
new file mode 100644
index 000000000000..6b9b3c56f009
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -0,0 +1,719 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static volatile int zero = 0;
+
+int my_pid;
+int arr[256];
+int small_arr[16] SEC(".data.small_arr");
+
+#ifdef REAL_TEST
+#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
+#else
+#define MY_PID_GUARD() ({ })
+#endif
+
+SEC("?raw_tp")
+__failure __msg("math between map_value pointer and register with unbounded min value is not allowed")
+int iter_err_unsafe_c_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = zero; /* obscure initial value of i */
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 1000);
+ while ((v = bpf_iter_num_next(&it))) {
+ i++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ small_arr[i] = 123; /* invalid */
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("unbounded memory access")
+int iter_err_unsafe_asm_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ MY_PID_GUARD();
+
+ asm volatile (
+ "r6 = %[zero];" /* iteration counter */
+ "r1 = %[it];" /* iterator state */
+ "r2 = 0;"
+ "r3 = 1000;"
+ "r4 = 1;"
+ "call %[bpf_iter_num_new];"
+ "loop:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto out;"
+ "r6 += 1;"
+ "goto loop;"
+ "out:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = %[small_arr];"
+ "r2 = r6;"
+ "r2 <<= 2;"
+ "r1 += r2;"
+ "*(u32 *)(r1 + 0) = r6;" /* invalid */
+ :
+ : [it]"r"(&it),
+ [small_arr]"p"(small_arr),
+ [zero]"p"(zero),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop_auto_cleanup(const void *ctx)
+{
+ __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ /* (!) no explicit bpf_iter_num_destroy() */
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_for_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_each_macro(const void *ctx)
+{
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_for_each(num, v, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_macro(const void *ctx)
+{
+ int i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", i);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_pragma_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 2);
+#pragma nounroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_manual_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_multiple_sequential_loops(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 0, 2);
+#pragma nounroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_limit_cond_break_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = 0, sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v);
+ sum += *v;
+
+ i++;
+ if (i > 3)
+ break;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_SIMPLE: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_obfuscate_counter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ /* Make i's initial value unknowable for verifier to prevent it from
+ * pruning if/else branch inside the loop body and marking i as precise.
+ */
+ int i = zero;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ int x;
+
+ i += 1;
+
+ /* If we initialized i as `int i = 0;` above, verifier would
+ * track that i becomes 1 on first iteration after increment
+ * above, and here verifier would eagerly prune else branch
+ * and mark i as precise, ruining open-coded iterator logic
+ * completely, as each next iteration would have a different
+ * *precise* value of i, and thus there would be no
+ * convergence of state. This would result in reaching maximum
+ * instruction limit, no matter what the limit is.
+ */
+ if (i == 1)
+ x = 123;
+ else
+ x = i * 3 + 1;
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x);
+
+ sum += x;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_search_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, *elem = NULL;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SEARCH_LOOP: v=%d", *v);
+
+ if (*v == 2) {
+ found = true;
+ elem = v;
+ barrier_var(elem);
+ }
+ }
+
+ /* should fail to verify if bpf_iter_num_destroy() is here */
+
+ if (found)
+ /* here found element will be wrong, we should have copied
+ * value to a variable, but here we want to make sure we can
+ * access memory after the loop anyways
+ */
+ bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem);
+ else
+ bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n");
+
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_array_fill(const void *ctx)
+{
+ int sum, i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ arr[i] = i * 2;
+ }
+
+ sum = 0;
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ sum += arr[i];
+ }
+
+ bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256);
+
+ return 0;
+}
+
+static int arr2d[4][5];
+static int arr2d_row_sums[4];
+static int arr2d_col_sums[5];
+
+SEC("raw_tp")
+__success
+int iter_nested_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+ }
+
+ bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_nested_deeply_iters(const void *ctx)
+{
+ int sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ sum += 1;
+ }
+ }
+ }
+ }
+ /* validate that we can break from inside bpf_repeat() */
+ break;
+ }
+
+ return sum;
+}
+
+static __noinline void fill_inner_dimension(int row)
+{
+ int col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+}
+
+static __noinline int sum_inner_dimension(int row)
+{
+ int sum = 0, col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_subprog_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ fill_inner_dimension(row);
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ sum += sum_inner_dimension(row);
+ }
+
+ bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d",
+ row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1000);
+} arr_map SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int iter_err_too_permissive1(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = NULL;
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive2(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive3(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ found = true;
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_tricky_but_fine(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (map_val) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0)
+
+SEC("raw_tp")
+__success
+int iter_stack_array_loop(const void *ctx)
+{
+ long arr1[16], arr2[16], sum = 0;
+ int i;
+
+ MY_PID_GUARD();
+
+ /* zero-init arr1 and arr2 in such a way that verifier doesn't know
+ * it's all zeros; if we don't do that, we'll make BPF verifier track
+ * all combination of zero/non-zero stack slots for arr1/arr2, which
+ * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different
+ * states
+ */
+ __bpf_memzero(arr1, sizeof(arr1));
+ __bpf_memzero(arr2, sizeof(arr1));
+
+ /* validate that we can break and continue when using bpf_for() */
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ if (i & 1) {
+ arr1[i] = i;
+ continue;
+ } else {
+ arr2[i] = i;
+ break;
+ }
+ }
+
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ sum += arr1[i] + arr2[i];
+ }
+
+ return sum;
+}
+
+static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
+{
+ int *t, i;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if (i >= n)
+ break;
+ arr[i] = i * mul;
+ }
+}
+
+static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
+{
+ int *t, i, sum = 0;;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if (i >= n)
+ break;
+ sum += arr[i];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_pass_iter_ptr_to_subprog(const void *ctx)
+{
+ int arr1[16], arr2[32];
+ struct bpf_iter_num it;
+ int n, sum1, sum2;
+
+ MY_PID_GUARD();
+
+ /* fill arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr1, n, 2);
+ bpf_iter_num_destroy(&it);
+
+ /* fill arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr2, n, 10);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ sum1 = sum(&it, arr1, n);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ sum2 = sum(&it, arr2, n);
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
new file mode 100644
index 000000000000..05fa5ce7fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int consume_first_item_only(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ "r0 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R0 invalid mem access 'scalar'")
+int missing_null_check_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ /* FAIL: deref with no NULL check */
+ "r1 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("invalid access to memory, mem_size=4 off=0 size=8")
+__msg("R0 min value is outside of the allowed memory range")
+int wrong_sized_read_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ /* FAIL: deref more than available 4 bytes */
+ "r0 = *(u64 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+int simplest_loop(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r6 = 0;" /* init sum */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto 2f;"
+ "r0 = *(u32 *)(r0 + 0);"
+ "r6 += r0;" /* accumulate sum */
+ "goto 1b;"
+
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c
new file mode 100644
index 000000000000..7a77a8daee0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_num.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <limits.h>
+#include <linux/errno.h>
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+const volatile __s64 exp_empty_zero = 0 + 1;
+__s64 res_empty_zero;
+
+SEC("raw_tp/sys_enter")
+int num_empty_zero(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 0) sum += i;
+ res_empty_zero = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_min = 0 + 2;
+__s64 res_empty_int_min;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_min(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN) sum += i;
+ res_empty_int_min = 2 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_max = 0 + 3;
+__s64 res_empty_int_max;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_max(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX, INT_MAX) sum += i;
+ res_empty_int_max = 3 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_minus_one = 0 + 4;
+__s64 res_empty_minus_one;
+
+SEC("raw_tp/sys_enter")
+int num_empty_minus_one(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -1, -1) sum += i;
+ res_empty_minus_one = 4 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_simple_sum = 9 * 10 / 2;
+__s64 res_simple_sum;
+
+SEC("raw_tp/sys_enter")
+int num_simple_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 10) sum += i;
+ res_simple_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_sum = -11 * 10 / 2;
+__s64 res_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -10, 0) sum += i;
+ res_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1);
+__s64 res_very_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN + 2) sum += i;
+ res_very_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2);
+__s64 res_very_big_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_big_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX - 2, INT_MAX) sum += i;
+ res_very_big_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_pos_sum = -3;
+__s64 res_neg_pos_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_pos_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -3, 3) sum += i;
+ res_neg_pos_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_invalid_range = -EINVAL;
+__s64 res_invalid_range;
+
+SEC("raw_tp/sys_enter")
+int num_invalid_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_invalid_range = bpf_iter_num_new(&it, 1, 0);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_max_range = 0 + 10;
+__s64 res_max_range;
+
+SEC("raw_tp/sys_enter")
+int num_max_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_e2big_range = -E2BIG;
+__s64 res_e2big_range;
+
+SEC("raw_tp/sys_enter")
+int num_e2big_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_succ_elem_cnt = 10;
+__s64 res_succ_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_succ_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v;
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_succ_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_overfetched_elem_cnt = 5;
+__s64 res_overfetched_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_overfetched_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 0, 5);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_overfetched_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_fail_elem_cnt = 20 + 0;
+__s64 res_fail_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_fail_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 100, 10);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_fail_elem_cnt = 20 + cnt;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
new file mode 100644
index 000000000000..d47e59aba6de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+int create_and_destroy(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int create_and_forget_to_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int destroy_without_creating_fail(void *ctx)
+{
+ /* init with zeros to stop verifier complaining about uninit stack */
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int compromise_iter_w_direct_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* don't destroy iter, leaking ref, which should fail */
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int compromise_iter_w_helper_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* overwrite 8th byte with bpf_probe_read_kernel() */
+ "r1 = %[iter];"
+ "r1 += 7;"
+ "r2 = 1;"
+ "r3 = 0;" /* NULL */
+ "call %[bpf_probe_read_kernel];"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel)
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+static __noinline void subprog_with_iter(void)
+{
+ struct bpf_iter_num iter;
+
+ bpf_iter_num_new(&iter, 0, 1);
+
+ return;
+}
+
+SEC("?raw_tp")
+__failure
+/* ensure there was a call to subprog, which might happen without __noinline */
+__msg("returning from callee:")
+__msg("Unreleased reference id=1")
+int leak_iter_from_subprog_fail(void *ctx)
+{
+ subprog_with_iter();
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+int valid_stack_reuse(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+
+ /* now reuse same stack slots */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected uninitialized iter_num as arg #1")
+int double_create_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* (attempt to) create iterator again */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int double_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* (attempt to) destroy iterator again */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int next_without_new_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int next_after_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int __naked read_from_iter_slot_fail(void)
+{
+ asm volatile (
+ /* r6 points to struct bpf_iter_num on the stack */
+ "r6 = r10;"
+ "r6 += -24;"
+
+ /* create iterator */
+ "r1 = r6;"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* attemp to leak bpf_iter_num state */
+ "r7 = *(u64 *)(r6 + 0);"
+ "r8 = *(u64 *)(r6 + 8);"
+
+ /* destroy iterator */
+ "r1 = r6;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* leak bpf_iter_num state */
+ "r0 = r7;"
+ "if r7 > r8 goto +1;"
+ "r0 = r8;"
+ "exit;"
+ :
+ : ITER_HELPERS
+ : __clobber_common, "r6", "r7", "r8"
+ );
+}
+
+int zero;
+
+SEC("?raw_tp")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("Unreleased reference")
+int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* Create a fork in logic, with general setup as follows:
+ * - fallthrough (first) path is valid;
+ * - branch (second) path is invalid.
+ * Then depending on what we do in fallthrough vs branch path,
+ * we try to detect bugs in func_states_equal(), regsafe(),
+ * refsafe(), stack_safe(), and similar by tricking verifier
+ * into believing that branch state is a valid subset of
+ * a fallthrough state. Verifier should reject overall
+ * validation, unless there is a bug somewhere in verifier
+ * logic.
+ */
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+
+ "if r6 > r7 goto bad;" /* fork */
+
+ /* spill r6 into stack slot of bpf_iter_num var */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "goto skip_bad;"
+
+ "bad:"
+ /* create iterator in the same stack slot */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* but then forget about it and overwrite it back to r6 spill */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "skip_bad:"
+ "goto +0;" /* force checkpoint */
+
+ /* corrupt stack slots, if they are really dynptr */
+ "*(u64 *)(%[iter] + 0) = r6;"
+ :
+ : __imm_ptr(iter),
+ __imm_addr(zero),
+ __imm(bpf_get_prandom_u32),
+ __imm(bpf_dynptr_from_mem),
+ ITER_HELPERS
+ : __clobber_common, "r6", "r7"
+ );
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
new file mode 100644
index 000000000000..3873fb6c292a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct bpf_iter_testmod_seq {
+ u64 :64;
+ u64 :64;
+};
+
+extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym;
+extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym;
+extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym;
+
+const volatile __s64 exp_empty = 0 + 1;
+__s64 res_empty;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_empty(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 0) sum += *i;
+ res_empty = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_full = 1000000;
+__s64 res_full;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_full(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i;
+ res_full = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_truncated = 10 * 1000000;
+__s64 res_truncated;
+
+static volatile int zero = 0;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_truncated(const void *ctx)
+{
+ __s64 sum = 0, *i;
+ int cnt = zero;
+
+ bpf_for_each(testmod_seq, i, 10, 2000000) {
+ sum += *i;
+ cnt++;
+ if (cnt >= 1000000)
+ break;
+ }
+ res_truncated = sum;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index b05571bc67d5..c4b49ceea967 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
/* weak and shared between two files */
const volatile int my_tid __weak;
@@ -51,6 +52,7 @@ __weak int set_output_weak(int x)
* cause problems for BPF static linker
*/
whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
output_weak1 = x;
return x;
@@ -71,6 +73,7 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id)
/* make sure we have CO-RE relocations in main program */
whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
set_output_val2(1000);
set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index ee7e3848ee4f..013ff0645f0c 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
/* weak and shared between both files */
const volatile int my_tid __weak;
@@ -51,6 +52,7 @@ __weak int set_output_weak(int x)
* cause problems for BPF static linker
*/
whatever = 2 * bpf_core_type_size(struct task_struct);
+ __sink(whatever);
output_weak2 = x;
return 2 * x;
@@ -71,6 +73,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id)
/* make sure we have CO-RE relocations in main program */
whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
set_output_val1(2000);
set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 4fa4a9b01bde..53ded51a3abb 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -313,7 +313,6 @@ SEC("tc")
int map_list_push_pop_multiple(void *ctx)
{
struct map_value *v;
- int ret;
v = bpf_map_lookup_elem(&array_map, &(int){0});
if (!v)
@@ -326,7 +325,6 @@ int inner_map_list_push_pop_multiple(void *ctx)
{
struct map_value *v;
void *map;
- int ret;
map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
if (!map)
@@ -352,7 +350,6 @@ SEC("tc")
int map_list_in_list(void *ctx)
{
struct map_value *v;
- int ret;
v = bpf_map_lookup_elem(&array_map, &(int){0});
if (!v)
@@ -365,7 +362,6 @@ int inner_map_list_in_list(void *ctx)
{
struct map_value *v;
void *map;
- int ret;
map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
if (!map)
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 69cdc07cba13..41978b46f58e 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -557,7 +557,6 @@ SEC("?tc")
int incorrect_head_off2(void *ctx)
{
struct foo *f;
- struct bar *b;
f = bpf_obj_new(typeof(*f));
if (!f)
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
new file mode 100644
index 000000000000..0ef286da092b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+struct map_value {
+ struct prog_test_ref_kfunc *not_kptr;
+ struct prog_test_ref_kfunc __kptr *val;
+ struct node_data __kptr *node;
+};
+
+/* This is necessary so that LLVM generates BTF for node_data struct
+ * If it's not included, a fwd reference for node_data will be generated but
+ * no struct. Example BTF of "node" field in map_value when not included:
+ *
+ * [10] PTR '(anon)' type_id=35
+ * [34] FWD 'node_data' fwd_kind=struct
+ * [35] TYPE_TAG 'kptr_ref' type_id=34
+ *
+ * (with no node_data struct defined)
+ * Had to do the same w/ bpf_kfunc_call_test_release below
+ */
+struct node_data *just_here_because_btf_bug;
+
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} some_nodes SEC(".maps");
+
+static int create_and_stash(int idx, int val)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key = val;
+
+ res = bpf_kptr_xchg(&mapval->node, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+long stash_rb_nodes(void *ctx)
+{
+ return create_and_stash(0, 41) ?: create_and_stash(1, 42);
+}
+
+SEC("tc")
+long unstash_rb_node(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+ long retval;
+ int key = 1;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->node, NULL);
+ if (res) {
+ retval = res->key;
+ bpf_obj_drop(res);
+ return retval;
+ }
+ return 1;
+}
+
+SEC("tc")
+long stash_test_ref_kfunc(void *ctx)
+{
+ struct prog_test_ref_kfunc *res;
+ struct map_value *mapval;
+ int key = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->val, NULL);
+ if (res)
+ bpf_kfunc_call_test_release(res);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 19423ed862e3..bc8ea56671a1 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -16,6 +16,7 @@ char _license[] SEC("license") = "GPL";
int monitored_pid = 0;
int inode_storage_result = -1;
int sk_storage_result = -1;
+int task_storage_result = -1;
struct local_storage {
struct inode *exec_inode;
@@ -50,26 +51,57 @@ struct {
__type(value, struct local_storage);
} task_storage_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} task_storage_map2 SEC(".maps");
+
SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct bpf_local_storage *local_storage;
struct local_storage *storage;
+ struct task_struct *task;
bool is_self_unlink;
if (pid != monitored_pid)
return 0;
- storage = bpf_task_storage_get(&task_storage_map,
- bpf_get_current_task_btf(), 0, 0);
- if (storage) {
- /* Don't let an executable delete itself */
- is_self_unlink = storage->exec_inode == victim->d_inode;
- if (is_self_unlink)
- return -EPERM;
- }
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
- return 0;
+ task_storage_result = -1;
+
+ storage = bpf_task_storage_get(&task_storage_map, task, 0, 0);
+ if (!storage)
+ return 0;
+
+ /* Don't let an executable delete itself */
+ is_self_unlink = storage->exec_inode == victim->d_inode;
+
+ storage = bpf_task_storage_get(&task_storage_map2, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage || storage->value)
+ return 0;
+
+ if (bpf_task_storage_delete(&task_storage_map, task))
+ return 0;
+
+ /* Ensure that the task_storage_map is disconnected from the storage.
+ * The storage memory should not be freed back to the
+ * bpf_mem_alloc.
+ */
+ local_storage = task->bpf_storage;
+ if (!local_storage || local_storage->smap)
+ return 0;
+
+ task_storage_result = 0;
+
+ return is_self_unlink ? -EPERM : 0;
}
SEC("lsm.s/inode_rename")
@@ -77,7 +109,6 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- __u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
int err;
@@ -109,18 +140,17 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
- int err;
if (pid != monitored_pid)
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 0);
if (!storage)
return 0;
+ sk_storage_result = -1;
if (storage->value != DUMMY_STORAGE_VALUE)
- sk_storage_result = -1;
+ return 0;
/* This tests that we can associate multiple elements
* with the local storage.
@@ -130,14 +160,22 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (!storage)
return 0;
- err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
- if (err)
+ if (bpf_sk_storage_delete(&sk_storage_map2, sock->sk))
return 0;
- err = bpf_sk_storage_delete(&sk_storage_map2, sock->sk);
- if (!err)
- sk_storage_result = err;
+ storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (bpf_sk_storage_delete(&sk_storage_map, sock->sk))
+ return 0;
+
+ /* Ensure that the sk_storage_map is disconnected from the storage. */
+ if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap)
+ return 0;
+ sk_storage_result = 0;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index 38de0331e6b4..e4ff97fbcce1 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -76,6 +77,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
return 0;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+ __sink(out_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
@@ -85,6 +87,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
}
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+ __sink(in_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index dc93887ed34c..fadfdd98707c 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -4,12 +4,12 @@
* Copyright 2020 Google LLC.
*/
-#include "bpf_misc.h"
#include "vmlinux.h"
+#include <errno.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <errno.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index 3903d30217b8..dae5dab1bbf7 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -515,7 +515,6 @@ int test_ls_map_kptr_ref1(void *ctx)
{
struct task_struct *current;
struct map_value *v;
- int ret;
current = bpf_get_current_task_btf();
if (!current)
@@ -534,7 +533,6 @@ int test_ls_map_kptr_ref2(void *ctx)
{
struct task_struct *current;
struct map_value *v;
- int ret;
current = bpf_get_current_task_btf();
if (!current)
@@ -550,7 +548,6 @@ int test_ls_map_kptr_ref_del(void *ctx)
{
struct task_struct *current;
struct map_value *v;
- int ret;
current = bpf_get_current_task_btf();
if (!current)
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 08f9ec18c345..15bf3127dba3 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -20,6 +20,7 @@ struct array_map {
} array_map SEC(".maps");
extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
extern struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
@@ -442,4 +443,26 @@ int kptr_get_ref_state(struct __sk_buff *ctx)
return 0;
}
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int kptr_xchg_possibly_null(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL passed to bpf_kptr_xchg() */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p)
+ bpf_kfunc_call_test_release(p);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index f718b2c212dc..f9ef8aee56f1 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -26,7 +26,6 @@ SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
union percpu_net_cnt *percpu_cnt;
- char fmt[] = "%d %llu %llu\n";
union net_cnt *cnt;
__u64 ts, dt;
int ret;
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index 1d8918dfbd3f..c0062645fc68 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -53,7 +53,6 @@ static int __strncmp(const void *m1, const void *m2, size_t len)
do { \
static const char _expectedval[EXPECTED_STRSIZE] = \
_expected; \
- static const char _ptrtype[64] = #_type; \
__u64 _hflags = _flags | BTF_F_COMPACT; \
static _type _ptrdata = __VA_ARGS__; \
static struct btf_ptr _ptr = { }; \
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index 45204fe0c570..29c1639fc78a 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -22,7 +22,6 @@ long dropped __attribute__((aligned(128))) = 0;
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_perfbuf(void *ctx)
{
- __u64 *sample;
int i;
for (i = 0; i < batch_cnt; i++) {
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 875513866032..f799d87e8700 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -6,6 +6,7 @@
#include <bpf/bpf_tracing.h>
#include "profiler.h"
+#include "err.h"
#ifndef NULL
#define NULL 0
@@ -16,7 +17,6 @@
#define O_DIRECTORY 00200000
#define __O_TMPFILE 020000000
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define MAX_ERRNO 4095
#define S_IFMT 00170000
#define S_IFSOCK 0140000
#define S_IFLNK 0120000
@@ -34,7 +34,6 @@
#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
-#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
#define KILL_DATA_ARRAY_SIZE 8
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 6c7b1fb268d6..026d573ce179 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -7,6 +7,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -294,17 +295,22 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
if (ctx.done)
return 0;
#else
-#ifdef NO_UNROLL
+#if defined(USE_ITER)
+/* no for loop, no unrolling */
+#elif defined(NO_UNROLL)
#pragma clang loop unroll(disable)
-#else
-#ifdef UNROLL_COUNT
+#elif defined(UNROLL_COUNT)
#pragma clang loop unroll_count(UNROLL_COUNT)
#else
#pragma clang loop unroll(full)
-#endif
#endif /* NO_UNROLL */
/* Unwind python stack */
+#ifdef USE_ITER
+ int i;
+ bpf_for(i, 0, STACK_MAX_LEN) {
+#else /* !USE_ITER */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
+#endif
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
@@ -339,7 +345,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
SEC("raw_tracepoint/kfree_skb")
int on_event(struct bpf_raw_tracepoint_args* ctx)
{
- int i, ret = 0;
+ int ret = 0;
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
new file mode 100644
index 000000000000..d62e1b200c30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+#define STACK_MAX_LEN 600
+#define SUBPROGS
+#define NO_UNROLL
+#define USE_ITER
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
index 6beff7502f4d..520b58c4f8db 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -2,7 +2,4 @@
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
#define NO_UNROLL
-/* clang will not unroll at all.
- * Total program size is around 2k insns
- */
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
index 340f97da1084..7651843f5a80 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
@@ -16,17 +16,6 @@ struct node_data {
struct bpf_list_node node;
};
-static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
-{
- struct node_data *node_a;
- struct node_data *node_b;
-
- node_a = container_of(a, struct node_data, node);
- node_b = container_of(b, struct node_data, node);
-
- return node_a->key < node_b->key;
-}
-
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
private(A) struct bpf_spin_lock glock;
private(A) struct bpf_rb_root groot __contains(node_data, node);
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index 1ced900f3fce..46d7d18a218f 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=2 alloc_insn=11")
+__failure __msg("Unreleased reference id=2 alloc_insn=10")
long rbtree_api_remove_no_drop(void *ctx)
{
struct bpf_rb_node *res;
@@ -119,6 +119,7 @@ long rbtree_api_remove_no_drop(void *ctx)
res = bpf_rbtree_remove(&groot, res);
n = container_of(res, struct node_data, node);
+ __sink(n);
bpf_spin_unlock(&glock);
/* bpf_obj_drop(n) is missing here */
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 7250bb76d18a..14fb01437fb8 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -23,7 +23,7 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
-struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
@@ -159,13 +159,8 @@ int task_acquire(void *ctx)
goto out;
/* acquire a reference which can be used outside rcu read lock region */
- gparent = bpf_task_acquire_not_zero(gparent);
+ gparent = bpf_task_acquire(gparent);
if (!gparent)
- /* Until we resolve the issues with using task->rcu_users, we
- * expect bpf_task_acquire_not_zero() to return a NULL task.
- * See the comment at the definition of
- * bpf_task_acquire_not_zero() for more details.
- */
goto out;
(void)bpf_task_storage_get(&map_a, gparent, 0, 0);
@@ -179,8 +174,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int miss_lock(void *ctx)
{
struct task_struct *task;
- struct css_set *cgroups;
- struct cgroup *dfl_cgrp;
/* missing bpf_rcu_read_lock() */
task = bpf_get_current_task_btf();
@@ -195,8 +188,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int miss_unlock(void *ctx)
{
struct task_struct *task;
- struct css_set *cgroups;
- struct cgroup *dfl_cgrp;
/* missing bpf_rcu_read_unlock() */
task = bpf_get_current_task_btf();
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
index a47bb0120719..76556e0b42b2 100644
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -23,7 +23,6 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(read_bpf_task_storage_busy)
{
int *value;
- int key;
if (!CONFIG_PREEMPT)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
index 3d1ae8b3402f..59748c95471a 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
@@ -17,8 +17,6 @@ SEC("cgroup/recvmsg4")
int recvmsg4_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock *sk;
- __u32 user_ip4;
- __u16 user_port;
sk = ctx->sk;
if (!sk)
diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
index 27dfb21b21b4..d9a4016596d5 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
@@ -20,8 +20,6 @@ SEC("cgroup/recvmsg6")
int recvmsg6_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock *sk;
- __u32 user_ip4;
- __u16 user_port;
sk = ctx->sk;
if (!sk)
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index ea75a44cb7fc..351e79aef2fa 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -21,8 +21,6 @@
SEC("cgroup/sendmsg4")
int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
{
- int prio;
-
if (ctx->type != SOCK_DGRAM)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index e2468a6d01a5..0660f29dca95 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,6 +1,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
@@ -40,6 +41,9 @@ int bpf_prog2(struct __sk_buff *skb)
__u8 *d = data;
__u8 sk, map;
+ __sink(lport);
+ __sink(rport);
+
if (data + 8 > data_end)
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index e562be6356f3..e02cfd380746 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -391,7 +391,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
struct strobe_map_raw map;
void *location;
uint64_t len;
- int i;
descr->tag_len = 0; /* presume no tag is set */
descr->cnt = -1; /* presume no value is set */
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
index 7fab39a3bb12..99c8d1d8a187 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -20,6 +21,8 @@ int subprog_tail2(struct __sk_buff *skb)
else
bpf_tail_call_static(skb, &jmp_table, 1);
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len;
}
@@ -30,6 +33,8 @@ int subprog_tail(struct __sk_buff *skb)
bpf_tail_call_static(skb, &jmp_table, 0);
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len * 2;
}
@@ -38,6 +43,8 @@ int classifier_0(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return subprog_tail2(skb);
}
@@ -46,6 +53,8 @@ int classifier_1(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len * 3;
}
@@ -54,6 +63,8 @@ int entry(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return subprog_tail(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
index 41ce83da78e8..4a9f63bea66c 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
#define __unused __attribute__((unused))
@@ -36,6 +37,8 @@ int entry(struct __sk_buff *skb)
/* Have data on stack which size is not a multiple of 8 */
volatile char arr[1] = {};
+ __sink(arr[0]);
+
return subprog_tail(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index 4c2a4b0e3a25..41f2d44f49cb 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -21,9 +21,10 @@ struct hash_map {
} __tasks_kfunc_map SEC(".maps");
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
-struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
{
@@ -60,6 +61,9 @@ static inline int tasks_kfunc_map_insert(struct task_struct *p)
}
acquired = bpf_task_acquire(p);
+ if (!acquired)
+ return -ENOENT;
+
old = bpf_kptr_xchg(&v->task, acquired);
if (old) {
bpf_task_release(old);
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index f19d54eda4f1..dcdea3127086 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -40,6 +40,9 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f
/* Can't invoke bpf_task_acquire() on an untrusted pointer. */
acquired = bpf_task_acquire(v->task);
+ if (!acquired)
+ return 0;
+
bpf_task_release(acquired);
return 0;
@@ -53,38 +56,49 @@ int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
/* Can't invoke bpf_task_acquire() on a random frame pointer. */
acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+ if (!acquired)
+ return 0;
+
bpf_task_release(acquired);
return 0;
}
SEC("kretprobe/free_task")
-__failure __msg("reg type unsupported for arg#0 function")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
acquired = bpf_task_acquire(task);
- /* Can't release a bpf_task_acquire()'d task without a NULL check. */
+ if (!acquired)
+ return 0;
bpf_task_release(acquired);
return 0;
}
-SEC("tp_btf/task_newtask")
-__failure __msg("R1 must be referenced or trusted")
-int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+SEC("kretprobe/free_task")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
- /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */
- acquired = bpf_task_acquire(task->group_leader);
- bpf_task_release(acquired);
+ bpf_rcu_read_lock();
+ if (!task) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ bpf_task_release(acquired);
+ bpf_rcu_read_unlock();
return 0;
}
-
SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
@@ -109,57 +123,7 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_
acquired = bpf_task_acquire(task);
/* Acquired task is never released. */
-
- return 0;
-}
-
-SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
-{
- struct task_struct *kptr;
-
- /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */
- kptr = bpf_task_kptr_get(&task);
- if (!kptr)
- return 0;
-
- bpf_task_release(kptr);
-
- return 0;
-}
-
-SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
-{
- struct task_struct *kptr, *acquired;
-
- acquired = bpf_task_acquire(task);
-
- /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */
- kptr = bpf_task_kptr_get(&acquired);
- bpf_task_release(acquired);
- if (!kptr)
- return 0;
-
- bpf_task_release(kptr);
-
- return 0;
-}
-
-SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 expected pointer to map value")
-int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
-{
- struct task_struct *kptr;
-
- /* Cannot use bpf_task_kptr_get() on a NULL pointer. */
- kptr = bpf_task_kptr_get(NULL);
- if (!kptr)
- return 0;
-
- bpf_task_release(kptr);
+ __sink(acquired);
return 0;
}
@@ -185,27 +149,20 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla
}
SEC("tp_btf/task_newtask")
-__failure __msg("Unreleased reference")
-int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
{
- struct task_struct *kptr;
- struct __tasks_kfunc_map_value *v;
-
- v = insert_lookup_task(task);
- if (!v)
- return 0;
-
- kptr = bpf_task_kptr_get(&v->task);
- if (!kptr)
- return 0;
+ struct task_struct *acquired;
- /* Kptr acquired above is never released. */
+ acquired = bpf_task_acquire(task);
+ /* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
+ bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
{
struct __tasks_kfunc_map_value *v;
@@ -233,7 +190,7 @@ int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
}
SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
{
struct __tasks_kfunc_map_value local, *v;
@@ -255,12 +212,13 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
return -ENOENT;
acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return -EEXIST;
old = bpf_kptr_xchg(&v->task, acquired);
/* old cannot be passed to bpf_task_release() without a NULL check. */
bpf_task_release(old);
- bpf_task_release(old);
return 0;
}
@@ -276,7 +234,7 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_
}
SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
@@ -297,6 +255,72 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
/* the argument of lsm task_free hook is untrusted. */
acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
bpf_task_release(acquired);
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm, 17, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm + 1, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("write into memory")
+int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags)
+{
+ bpf_probe_read_kernel(task->comm, 16, task->comm);
+ return 0;
+}
+
+SEC("fentry/__set_task_comm")
+__failure __msg("R1 type=ptr_ expected")
+int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec)
+{
+ /*
+ * task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee
+ * its safety. Hence it cannot be accessed with normal load insns.
+ */
+ bpf_strncmp(task->comm, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *local;
+ struct __tasks_kfunc_map_value *v;
+
+ if (tasks_kfunc_map_insert(task))
+ return 0;
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v)
+ return 0;
+
+ bpf_rcu_read_lock();
+ local = v->task;
+ if (!local) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't release a kptr that's still stored in a map. */
+ bpf_task_release(local);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
index 9f359cfd29e7..b09371bba204 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -17,6 +17,10 @@ int err, pid;
* TP_PROTO(struct task_struct *p, u64 clone_flags)
*/
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+void invalid_kfunc(void) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
+
static bool is_test_kfunc_task(void)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -26,10 +30,27 @@ static bool is_test_kfunc_task(void)
static int test_acquire_release(struct task_struct *task)
{
- struct task_struct *acquired;
+ struct task_struct *acquired = NULL;
+
+ if (!bpf_ksym_exists(bpf_task_acquire)) {
+ err = 3;
+ return 0;
+ }
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) {
+ err = 4;
+ return 0;
+ }
+ if (bpf_ksym_exists(invalid_kfunc)) {
+ /* the verifier's dead code elimination should remove this */
+ err = 5;
+ asm volatile ("goto -1"); /* for (;;); */
+ }
acquired = bpf_task_acquire(task);
- bpf_task_release(acquired);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 6;
return 0;
}
@@ -101,7 +122,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
}
SEC("tp_btf/task_newtask")
-int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
+int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
@@ -122,18 +143,18 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
return 0;
}
- kptr = bpf_task_kptr_get(&v->task);
- if (kptr) {
- /* Until we resolve the issues with using task->rcu_users, we
- * expect bpf_task_kptr_get() to return a NULL task. See the
- * comment at the definition of bpf_task_acquire_not_zero() for
- * more details.
- */
- bpf_task_release(kptr);
+ bpf_rcu_read_lock();
+ kptr = v->task;
+ if (!kptr) {
err = 3;
- return 0;
+ } else {
+ kptr = bpf_task_acquire(kptr);
+ if (!kptr)
+ err = 4;
+ else
+ bpf_task_release(kptr);
}
-
+ bpf_rcu_read_unlock();
return 0;
}
@@ -148,7 +169,10 @@ int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 cl
current = bpf_get_current_task_btf();
acquired = bpf_task_acquire(current);
- bpf_task_release(acquired);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
return 0;
}
@@ -171,8 +195,6 @@ static void lookup_compare_pid(const struct task_struct *p)
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
{
- struct task_struct *acquired;
-
if (!is_test_kfunc_task())
return 0;
@@ -183,8 +205,6 @@ int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
{
- struct task_struct *current, *acquired;
-
if (!is_test_kfunc_task())
return 0;
@@ -208,11 +228,13 @@ static int is_pid_lookup_valid(s32 pid)
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
{
- struct task_struct *acquired;
-
if (!is_test_kfunc_task())
return 0;
+ bpf_strncmp(task->comm, 12, "foo");
+ bpf_strncmp(task->comm, 16, "foo");
+ bpf_strncmp(&task->comm[8], 4, "foo");
+
if (is_pid_lookup_valid(-1)) {
err = 1;
return 0;
@@ -225,3 +247,19 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla
return 0;
}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* task->group_leader is listed as a trusted, non-NULL field of task struct. */
+ acquired = bpf_task_acquire(task->group_leader);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
+
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
new file mode 100644
index 000000000000..b93a0ed33057
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ca1_cnt = 0;
+int ca2_cnt = 0;
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/ca_update_1_init")
+void BPF_PROG(ca_update_1_init, struct sock *sk)
+{
+ ca1_cnt++;
+}
+
+SEC("struct_ops/ca_update_2_init")
+void BPF_PROG(ca_update_2_init, struct sock *sk)
+{
+ ca2_cnt++;
+}
+
+SEC("struct_ops/ca_update_cong_control")
+void BPF_PROG(ca_update_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+}
+
+SEC("struct_ops/ca_update_ssthresh")
+__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/ca_update_undo_cwnd")
+__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_1 = {
+ .init = (void *)ca_update_1_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_2 = {
+ .init = (void *)ca_update_2_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_wrong = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_wrong",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops ca_no_link = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_no_link",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index 43447704cf0e..0724a79cec78 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -16,6 +16,16 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
return (struct tcp_sock *)sk;
}
+static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
+{
+ return tp->sacked_out + tp->lost_out;
+}
+
+static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+{
+ return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
+}
+
SEC("struct_ops/write_sk_pacing_init")
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{
@@ -31,11 +41,12 @@ SEC("struct_ops/write_sk_pacing_cong_control")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
- const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
unsigned long rate =
((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
(tp->srtt_us ?: 1U << 3);
sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+ tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
}
SEC("struct_ops/write_sk_pacing_ssthresh")
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 9fc603c9d673..77ad8adf68da 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -75,7 +75,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
struct bpf_sock_tuple bpf_tuple;
struct nf_conn *ct;
- int err;
__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index f45a7095de7a..f41c81212ee9 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -455,7 +455,6 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynpt
static ret_t skip_next_hops(__u64 *offset, int n)
{
- __u32 res;
switch (n) {
case 1:
*offset += sizeof(struct in_addr);
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
index ab1e647aeb31..b86fdda2a6ea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
@@ -42,7 +42,6 @@ int test_core_bitfields(void *ctx)
{
struct core_reloc_bitfields *in = (void *)&data.in;
struct core_reloc_bitfields_output *out = (void *)&data.out;
- uint64_t res;
out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1);
out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 23970a20b324..b85fc8c423ba 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return f0(0, skb) + skb->len;
}
@@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
index 3dce97fb52a4..2beab9c3b68a 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func2.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func2.c
@@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return f0(0, skb) + skb->len;
}
@@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
index 473a22794a62..8b438128f46b 100644
--- a/tools/testing/selftests/bpf/progs/test_hash_large_key.c
+++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
@@ -28,7 +28,7 @@ struct bigelement {
SEC("raw_tracepoint/sys_enter")
int bpf_hash_large_key_test(void *ctx)
{
- int zero = 0, err = 1, value = 42;
+ int zero = 0, value = 42;
struct bigelement *key;
key = bpf_map_lookup_elem(&key_map, &zero);
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
index a72a5bf3812a..27109b877714 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -35,7 +35,6 @@ SEC("raw_tp/sys_enter")
int handler2(const void *ctx)
{
int *active;
- __u32 cpu;
active = bpf_this_cpu_ptr(&bpf_prog_active);
write_active(active);
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
index 5f8379aadb29..d00268c91e19 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -20,6 +20,8 @@ __u64 out__non_existent_typed = -1;
/* test existing weak symbols can be resolved. */
extern const struct rq runqueues __ksym __weak; /* typed */
extern const void bpf_prog_active __ksym __weak; /* typeless */
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
/* non-existent weak symbols. */
@@ -29,6 +31,7 @@ extern const void bpf_link_fops1 __ksym __weak;
/* typed symbols, default to zero. */
extern const int bpf_link_fops2 __ksym __weak;
+void invalid_kfunc(void) __ksym __weak;
SEC("raw_tp/sys_enter")
int pass_handler(const void *ctx)
@@ -37,7 +40,7 @@ int pass_handler(const void *ctx)
/* tests existing symbols. */
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
- if (rq)
+ if (rq && bpf_ksym_exists(&runqueues))
out__existing_typed = rq->cpu;
out__existing_typeless = (__u64)&bpf_prog_active;
@@ -50,6 +53,18 @@ int pass_handler(const void *ctx)
if (&bpf_link_fops2) /* can't happen */
out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+ if (!bpf_ksym_exists(bpf_task_acquire))
+ /* dead code won't be seen by the verifier */
+ bpf_task_acquire(0);
+
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc))
+ /* dead code won't be seen by the verifier */
+ bpf_testmod_test_mod_kfunc(0);
+
+ if (bpf_ksym_exists(invalid_kfunc))
+ /* dead code won't be seen by the verifier */
+ invalid_kfunc();
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
index 64c2d9ced529..42718cd8e6a4 100644
--- a/tools/testing/selftests/bpf/progs/test_legacy_printk.c
+++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
@@ -56,7 +56,7 @@ int handle_legacy(void *ctx)
SEC("tp/raw_syscalls/sys_enter")
int handle_modern(void *ctx)
{
- int zero = 0, cur_pid;
+ int cur_pid;
cur_pid = bpf_get_current_pid_tgid() >> 32;
if (cur_pid != my_pid_var)
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index acf073db9e8b..1c02511b73cd 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -33,7 +33,7 @@ struct {
SEC("cgroup/skb")
int bpf_map_lock_test(struct __sk_buff *skb)
{
- struct hmap_elem zero = {}, *val;
+ struct hmap_elem *val;
int rnd = bpf_get_prandom_u32();
int key = 0, err = 1, i;
struct array_elem *q;
diff --git a/tools/testing/selftests/bpf/progs/test_map_ops.c b/tools/testing/selftests/bpf/progs/test_map_ops.c
new file mode 100644
index 000000000000..b53b46a090c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_ops.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, 1);
+ __type(value, int);
+} stack_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} array_map SEC(".maps");
+
+const volatile pid_t pid;
+long err = 0;
+
+static u64 callback(u64 map, u64 key, u64 val, u64 ctx, u64 flags)
+{
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpid")
+int map_update(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_update_elem(&hash_map, &key, &val, BPF_NOEXIST);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getppid")
+int map_delete(void *ctx)
+{
+ const int key = 0;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_delete_elem(&hash_map, &key);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int map_push(void *ctx)
+{
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_push_elem(&stack_map, &val, 0);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_geteuid")
+int map_pop(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_pop_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getgid")
+int map_peek(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_peek_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_gettid")
+int map_for_each_pass(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = 0;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int map_for_each_fail(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = BPF_NOEXIST;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ /* calling for_each with non-zero flags will return error */
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index ded71b3ff6b4..2850ae788a91 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -4,6 +4,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -19,6 +20,7 @@ int test_obj_id(void *ctx)
__u64 *value;
value = bpf_map_lookup_elem(&test_map_id, &key);
+ __sink(value);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
index 79bab9b50e9e..d9b2ba7ac340 100644
--- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
@@ -87,7 +87,6 @@ int xdp_ingress_v6(struct xdp_md *xdp)
__u8 tcp_hdr_opt_len = 0;
struct tcphdr *tcp_hdr;
__u64 tcp_offset = 0;
- __u32 off;
int err;
tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
index d3b319722e30..dc6e43bc6a62 100644
--- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
@@ -30,7 +30,7 @@ __u32 server_id;
static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
__u32 *server_id)
{
- __u8 *tcp_opt, kind, hdr_len;
+ __u8 kind, hdr_len;
__u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
__u8 *data;
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 5cd7c096f62d..bce7173152c6 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -13,6 +13,7 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
/* llvm will optimize both subprograms into exactly the same BPF assembly
*
@@ -51,6 +52,8 @@ int get_skb_len(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->len;
}
@@ -73,6 +76,8 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 5bdc0d38efc0..501cefa97633 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -41,7 +41,6 @@ int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
- int zero = 0;
if (cur_pid != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
index 2760bf60d05a..21bb7da90ea5 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
@@ -53,6 +53,7 @@ int test_ringbuf_mem_map_key(void *ctx)
/* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg
*/
lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample);
+ __sink(lookup_val);
/* workaround - memcpy is necessary so that verifier doesn't
* complain with:
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index e416e0ce12b7..9626baa6779c 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -59,7 +59,6 @@ int test_ringbuf(void *ctx)
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
void *rb;
- int zero = 0;
if (cur_pid != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 7d56ed47cd4d..5eb25c6ad75b 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -64,7 +64,7 @@ SEC("sk_reuseport")
int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
{
__u32 linum, index = 0, flags = 0, index_zero = 0;
- __u32 *result_cnt, *linum_value;
+ __u32 *result_cnt;
struct data_check data_check = {};
struct cmd *cmd, cmd_copy;
void *data, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 21b19b758c4e..3079244c7f96 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -15,6 +15,7 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
#if defined(IPROUTE2_HAVE_LIBBPF)
/* Use a new-style map definition. */
@@ -57,7 +58,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
void *data = (void *)(long)skb->data;
struct bpf_sock_tuple *result;
struct ethhdr *eth;
- __u64 tuple_len;
__u8 proto = 0;
__u64 ihl_len;
@@ -94,6 +94,7 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
return NULL;
*tcp = (proto == IPPROTO_TCP);
+ __sink(ihl_len);
return result;
}
@@ -173,7 +174,6 @@ int bpf_sk_assign_test(struct __sk_buff *skb)
struct bpf_sock_tuple *tuple;
bool ipv4 = false;
bool tcp = false;
- int tuple_len;
int ret = 0;
tuple = get_tuple(skb, &ipv4, &tcp);
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 6058dcb11b36..71f844b9b902 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -391,7 +391,6 @@ SEC("sk_lookup")
int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
- int err, family;
__u32 val_u32;
bool v4;
@@ -645,9 +644,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
SEC("sk_lookup")
int multi_prog_redir1(struct bpf_sk_lookup *ctx)
{
- int ret;
-
- ret = select_server_a(ctx);
+ (void)select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
@@ -655,9 +652,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx)
SEC("sk_lookup")
int multi_prog_redir2(struct bpf_sk_lookup *ctx)
{
- int ret;
-
- ret = select_server_a(ctx);
+ (void)select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index 6ccf6d546074..e9efc3263022 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -110,7 +110,6 @@ int err_modify_sk_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) {
@@ -125,7 +124,6 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
sk += 1;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 6dc1f28fc4b6..02e718f06e0f 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -92,4 +92,20 @@ int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
return 0;
}
+SEC("tp_btf/tcp_retransmit_synack")
+int BPF_PROG(tcp_retransmit_synack, struct sock* sk, struct request_sock* req)
+{
+ /* load only test */
+ bpf_sk_storage_get(&sk_stg_map, sk, 0, 0);
+ bpf_sk_storage_get(&sk_stg_map, req->sk, 0, 0);
+ return 0;
+}
+
+SEC("tp_btf/tcp_bad_csum")
+int BPF_PROG(tcp_bad_csum, struct sk_buff* skb)
+{
+ bpf_sk_storage_get(&sk_stg_map, skb->sk, 0, 0);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 9f4b8f9f1181..bbad3c2d9aa5 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -121,7 +121,7 @@ static void tpcpy(struct bpf_tcp_sock *dst,
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
- struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
+ struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 6c85b00f27b2..baf9ebc6d903 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -14,6 +14,7 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
/* Sockmap sample program connects a client and a backend together
* using cgroups.
@@ -111,12 +112,15 @@ int bpf_prog2(struct __sk_buff *skb)
int len, *f, ret, zero = 0;
__u64 flags = 0;
+ __sink(rport);
if (lport == 10000)
ret = 10;
else
ret = 1;
len = (__u32)skb->data_end - (__u32)skb->data;
+ __sink(len);
+
f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
@@ -180,7 +184,6 @@ int bpf_prog3(struct __sk_buff *skb)
if (err)
return SK_DROP;
bpf_write_pass(skb, 13);
-tls_out:
return ret;
}
@@ -188,8 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
- int op, err = 0, index, key, ret;
-
+ int op, err, ret;
op = (int) skops->op;
@@ -228,6 +230,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break;
}
+ __sink(err);
+
return 0;
}
@@ -321,6 +325,10 @@ int bpf_prog8(struct sk_msg_md *msg)
} else {
return SK_DROP;
}
+
+ __sink(data_end);
+ __sink(data);
+
return SK_PASS;
}
SEC("sk_msg4")
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 5bd10409285b..b2440a0ff422 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct hmap_elem {
volatile int cnt;
@@ -89,6 +90,8 @@ int bpf_spin_lock_test(struct __sk_buff *skb)
credit = q->credit;
bpf_spin_unlock(&q->lock);
+ __sink(credit);
+
/* spin_lock in cgroup local storage */
cls = bpf_get_local_storage(&cls_map, 0);
bpf_spin_lock(&cls->lock);
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index 728dbd39eff0..47568007b668 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -38,7 +38,7 @@ struct {
__type(value, stack_trace_t);
} stack_amap SEC(".maps");
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
char prev_comm[TASK_COMM_LEN];
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index 125beec31834..74ec09f040b7 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -163,9 +163,9 @@ static int skb_get_type(struct __sk_buff *skb)
ip6h = data + sizeof(struct ethhdr);
if (ip6h + 1 > data_end)
return -1;
- if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
+ if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}}))
ns = SRC_NS;
- else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
+ else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}}))
ns = DST_NS;
inet_proto = ip6h->nexthdr;
trans = ip6h + 1;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index 3e32ea375ab4..de15155f2609 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -94,7 +94,7 @@ int tc_dst(struct __sk_buff *skb)
redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
break;
case __bpf_constant_htons(ETH_P_IPV6):
- redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_src}});
break;
}
@@ -119,7 +119,7 @@ int tc_src(struct __sk_buff *skb)
redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
break;
case __bpf_constant_htons(ETH_P_IPV6):
- redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_dst}});
break;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 3ded05280757..cf7ed8cbb1fe 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -46,8 +46,6 @@ int bpf_testcb(struct bpf_sock_ops *skops)
struct bpf_sock_ops *reuse = skops;
struct tcphdr *thdr;
int window_clamp = 9216;
- int good_call_rv = 0;
- int bad_call_rv = 0;
int save_syn = 1;
int rv = -1;
int v = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index 43bd7a20cc50..4cb8bbb6a320 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -4,7 +4,7 @@
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
char prev_comm[TASK_COMM_LEN];
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 95b4aa0928ba..f66af753bbbb 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -52,6 +52,21 @@ struct vxlan_metadata {
__u32 gbp;
};
+struct bpf_fou_encap {
+ __be16 sport;
+ __be16 dport;
+};
+
+enum bpf_fou_encap_type {
+ FOU_BPF_ENCAP_FOU,
+ FOU_BPF_ENCAP_GUE,
+};
+
+int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap, int type) __ksym;
+int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap) __ksym;
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
@@ -209,7 +224,6 @@ int erspan_get_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
@@ -289,7 +303,6 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
@@ -405,8 +418,6 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
- __u32 orig_daddr;
- __u32 index = 0;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_FLAGS);
@@ -443,9 +454,7 @@ int veth_set_outer_dst(struct __sk_buff *skb)
void *data_end = (void *)(long)skb->data_end;
struct udphdr *udph;
struct iphdr *iph;
- __u32 index = 0;
int ret = 0;
- int shrink;
__s64 csum;
if ((void *)eth + sizeof(*eth) > data_end) {
@@ -756,6 +765,108 @@ int ipip_get_tunnel(struct __sk_buff *skb)
}
SEC("tc")
+int ipip_gue_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP)
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_GUE);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_fou_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP)
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_FOU);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_encap_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_fou_encap(skb, &encap);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_ntohs(encap.dport) != 5555)
+ return TC_ACT_SHOT;
+
+ bpf_printk("%d remote ip 0x%x, sport %d, dport %d\n", ret,
+ key.remote_ipv4, bpf_ntohs(encap.sport),
+ bpf_ntohs(encap.dport));
+ return TC_ACT_OK;
+}
+
+SEC("tc")
int ipip6_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
index aa6de32b50d1..962f3462066a 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -18,8 +18,6 @@ int usdt_100_sum;
SEC("usdt//proc/self/exe:test:usdt_100")
int BPF_USDT(usdt_100, int x)
{
- long tmp;
-
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
index ac6135d9374c..323a73fb2e8c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx)
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index f90ffcafd1e8..f5318f757084 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx)
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
index ca33a9b711c4..2e06dbb1ad5c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx)
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 32;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 297c260fc364..81bb38d72ced 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -5,8 +5,6 @@
SEC("xdp")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
int data_len = bpf_xdp_get_buff_len(xdp);
int offset = 0;
/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index 3379d303f41a..ee48c4963971 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -45,8 +45,6 @@ SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
struct meta meta;
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 77a123071940..5baaafed0d2d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -4,6 +4,19 @@
#define ETH_ALEN 6
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
+/**
+ * enum frame_mark - magics to distinguish page/packet paths
+ * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
+ * @MARK_IN: frame is being processed by the input XDP prog.
+ * @MARK_SKB: frame did hit the TC ingress hook as an skb.
+ */
+enum frame_mark {
+ MARK_XMIT = 0U,
+ MARK_IN = 0x42,
+ MARK_SKB = 0x45,
+};
+
const volatile int ifindex_out;
const volatile int ifindex_in;
const volatile __u8 expect_dst[ETH_ALEN];
@@ -34,12 +47,12 @@ int xdp_redirect(struct xdp_md *xdp)
if (*metadata != 0x42)
return XDP_ABORTED;
- if (*payload == 0) {
- *payload = 0x42;
+ if (*payload == MARK_XMIT)
pkts_seen_zero++;
- }
- if (bpf_xdp_adjust_meta(xdp, 4))
+ *payload = MARK_IN;
+
+ if (bpf_xdp_adjust_meta(xdp, sizeof(__u64)))
return XDP_ABORTED;
if (retcode > XDP_PASS)
@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
return ret;
}
-static bool check_pkt(void *data, void *data_end)
+static bool check_pkt(void *data, void *data_end, const __u32 mark)
{
struct ipv6hdr *iph = data + sizeof(struct ethhdr);
__u8 *payload = data + HDR_SZ;
@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
if (payload + 1 > data_end)
return false;
- if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
+ if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
return false;
/* reset the payload so the same packet doesn't get counted twice when
* it cycles back through the kernel path and out the dst veth
*/
- *payload = 0;
+ *payload = mark;
return true;
}
@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end;
- if (check_pkt(data, data_end))
+ if (check_pkt(data, data_end, MARK_XMIT))
pkts_seen_xdp++;
- /* Return XDP_DROP to make sure the data page is recycled, like when it
- * exits a physical NIC. Recycled pages will be counted in the
+ /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
+ * it exited a physical NIC. Those pages will be counted in the
* pkts_seen_zero counter above.
*/
return XDP_DROP;
@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
- if (check_pkt(data, data_end))
+ if (check_pkt(data, data_end, MARK_SKB))
pkts_seen_tc++;
+ /* Will be either recycled or freed, %MARK_SKB makes sure it won't
+ * hit any of the counters above.
+ */
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
index 7521a805b506..25ee4a22e48d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -82,7 +82,6 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xd
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
- __u32 transport_hdr_sz;
struct iphdr *iph;
__u16 *next_iph;
__u16 payload_len;
@@ -165,7 +164,6 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xd
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
- __u32 transport_hdr_sz;
struct ipv6hdr *ip6h;
__u16 payload_len;
struct vip vip = {};
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index ba48fcb98ab2..42c8f6ded0e4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -372,45 +372,6 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
}
static __attribute__ ((noinline))
-bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct ipv6hdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- if (inner_v4)
- new_eth->eth_proto = 8;
- else
- new_eth->eth_proto = 56710;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
- return false;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return true;
-}
-
-static __attribute__ ((noinline))
-bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct iphdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- new_eth->eth_proto = 8;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return false;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return true;
-}
-
-static __attribute__ ((noinline))
int swap_mac_and_send(void *data, void *data_end)
{
unsigned char tmp_mac[6];
@@ -430,7 +391,6 @@ int send_icmp_reply(void *data, void *data_end)
__u16 *next_iph_u16;
__u32 tmp_addr = 0;
struct iphdr *iph;
- __u32 csum1 = 0;
__u32 csum = 0;
__u64 off = 0;
@@ -662,7 +622,6 @@ static int process_l3_headers_v4(struct packet_description *pckt,
void *data_end)
{
struct iphdr *iph;
- __u64 iph_len;
int action;
iph = data + off;
@@ -696,7 +655,6 @@ static int process_packet(void *data, __u64 off, void *data_end,
struct packet_description pckt = { };
struct vip_definition vip = { };
struct lb_stats *data_stats;
- struct eth_hdr *eth = data;
void *lru_map = &lru_cache;
struct vip_meta *vip_info;
__u32 lru_stats_key = 513;
@@ -704,7 +662,6 @@ static int process_packet(void *data, __u64 off, void *data_end,
__u32 stats_key = 512;
struct ctl_value *cval;
__u16 pkt_bytes;
- __u64 iph_len;
__u8 protocol;
__u32 vip_num;
int action;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index 4ddcb6dfe500..f3ec8086482d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -210,19 +210,6 @@ int xdp_prognum2(struct xdp_md *ctx)
}
static __always_inline
-void shift_mac_4bytes_16bit(void *data)
-{
- __u16 *p = data;
-
- p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
- p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
- p[5] = p[3];
- p[4] = p[2];
- p[3] = p[1];
- p[2] = p[0];
-}
-
-static __always_inline
void shift_mac_4bytes_32bit(void *data)
{
__u32 *p = data;
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
index eb78e6f03129..a9629ac230fd 100644
--- a/tools/testing/selftests/bpf/progs/type_cast.c
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -63,7 +63,6 @@ SEC("?tp_btf/sys_enter")
int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
{
struct task_struct *task, *task_dup;
- long *ptr;
task = bpf_get_current_task_btf();
task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
index 165e3c2dd9a3..4767451b59ac 100644
--- a/tools/testing/selftests/bpf/progs/udp_limit.c
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -17,7 +17,6 @@ SEC("cgroup/sock_create")
int sock(struct bpf_sock *ctx)
{
int *sk_storage;
- __u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
@@ -46,7 +45,6 @@ SEC("cgroup/sock_release")
int sock_release(struct bpf_sock *ctx)
{
int *sk_storage;
- __u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
index 0ade1110613b..dd3bdf672633 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -162,8 +162,6 @@ SEC("fentry/" SYS_PREFIX "sys_prctl")
int test_user_ringbuf_protocol(void *ctx)
{
long status = 0;
- struct sample *sample = NULL;
- struct bpf_dynptr ptr;
if (!is_test_process())
return 0;
@@ -183,10 +181,6 @@ int test_user_ringbuf_protocol(void *ctx)
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int test_user_ringbuf(void *ctx)
{
- int status = 0;
- struct sample *sample = NULL;
- struct bpf_dynptr ptr;
-
if (!is_test_process())
return 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
new file mode 100644
index 000000000000..e97e518516b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/and.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("invalid and of negative number")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_and_of_negative_number(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= -4; \
+ r1 <<= 2; \
+ r0 += r1; \
+l0_%=: r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid range check")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_range_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r9 = 1; \
+ w1 %%= 2; \
+ w1 += 1; \
+ w9 &= w1; \
+ w9 += 1; \
+ w9 >>= 1; \
+ w3 = 1; \
+ w3 -= w9; \
+ w3 *= 0x10000000; \
+ r0 += r3; \
+ *(u32*)(r0 + 0) = r3; \
+l0_%=: r0 = r0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check known subreg with unknown reg")
+__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__retval(0)
+__naked void known_subreg_with_unknown_reg(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ r0 += 1; \
+ r0 &= 0xFFFF1234; \
+ /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */\
+ if w0 < 1 goto l0_%=; \
+ r1 = *(u32*)(r1 + 512); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
new file mode 100644
index 000000000000..95d7ecc12963
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/array_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_RDONLY_PROG);
+} map_array_ro SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_WRONLY_PROG);
+} map_array_wo SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("valid map access into an array with a constant")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void an_array_with_a_constant_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a register")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a signed variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_a_signed_variable(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if w1 s> 0xffffffff goto l1_%=; \
+ w1 = 0; \
+l1_%=: w2 = %[max_entries]; \
+ if r2 s> r1 goto l2_%=; \
+ w1 = 0; \
+l2_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a constant")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=8")
+__failure_unpriv
+__naked void an_array_with_a_constant_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + %[__imm_0]) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, (MAX_ENTRIES + 1) << 2),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a register")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a variable")
+__failure
+__msg("R0 unbounded memory access, make sure to bounds check any such access")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with no floor check")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_no_floor_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ w2 = %[max_entries]; \
+ if r2 s> r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("invalid access to map value, value_size=48 off=44 size=8")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ w2 = %[__imm_0]; \
+ if r2 > r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("R0 pointer += pointer")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r8 = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += r8; \
+ r0 = *(u32*)(r0 + %[test_val_foo]); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid read map access into a read-only array 1")
+__success __success_unpriv __retval(28)
+__naked void a_read_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid read map access into a read-only array 2")
+__success __retval(65507)
+__naked void a_read_only_array_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 &= 0xffff; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid write map access into a read-only array 1")
+__failure __msg("write into map forbidden")
+__failure_unpriv
+__naked void a_read_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid write map access into a read-only array 2")
+__failure __msg("write into map forbidden")
+__naked void a_read_only_array_2_2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid write map access into a write-only array 1")
+__success __success_unpriv __retval(1)
+__naked void a_write_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid write map access into a write-only array 2")
+__success __retval(0)
+__naked void a_write_only_array_2_1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid read map access into a write-only array 1")
+__failure __msg("read from map forbidden")
+__failure_unpriv
+__naked void a_write_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid read map access into a write-only array 2")
+__failure __msg("read from map forbidden")
+__naked void a_write_only_array_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
new file mode 100644
index 000000000000..359df865a8f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/basic_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("stack out of bounds")
+__failure __msg("invalid write to stack")
+__failure_unpriv
+__naked void stack_out_of_bounds(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 + 8) = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack1")
+__failure __msg("invalid indirect read from stack")
+__failure_unpriv
+__naked void uninitialized_stack1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack2")
+__failure __msg("invalid read from stack")
+__failure_unpriv
+__naked void uninitialized_stack2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("invalid fp arithmetic")
+__failure __msg("R1 subtraction from stack pointer")
+__failure_unpriv
+__naked void invalid_fp_arithmetic(void)
+{
+ /* If this gets ever changed, make sure JITs can deal with it. */
+ asm volatile (" \
+ r0 = 0; \
+ r1 = r10; \
+ r1 -= 8; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("non-invalid fp arithmetic")
+__success __success_unpriv __retval(0)
+__naked void non_invalid_fp_arithmetic(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned read from stack")
+__failure __msg("misaligned stack access")
+__failure_unpriv
+__naked void misaligned_read_from_stack(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 4); \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
new file mode 100644
index 000000000000..c506afbdd936
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_deduction.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from const, 1")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 2")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(1)
+__naked void deducing_bounds_from_const_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s<= 1 goto l1_%=; \
+ exit; \
+l1_%=: r1 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 3")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 4")
+__success __failure_unpriv
+__msg_unpriv("R6 has pointer with unsupported alu operation")
+__retval(0)
+__naked void deducing_bounds_from_const_4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s>= 0 goto l1_%=; \
+ exit; \
+l1_%=: r6 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 5")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_5(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 1 goto l0_%=; \
+ r0 -= r1; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 6")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_6(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 7")
+__failure __msg("dereference of modified ctx ptr")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_7(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r1 -= r0; \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 8")
+__failure __msg("negative offset ctx ptr R1 off=-1 disallowed")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_8(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+ r1 += r0; \
+l0_%=: r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 9")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_9(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 10")
+__failure
+__msg("math between ctx pointer and register with unbounded min value is not allowed")
+__failure_unpriv
+__naked void deducing_bounds_from_const_10(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: /* Marks reg as unknown. */ \
+ r0 = -r0; \
+ r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
new file mode 100644
index 000000000000..823f727cf210
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_6(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 5; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 3 goto l0_%=; \
+ w2 = 4; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r2 = 0; \
+ if r2 > r0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 > r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 >= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 < r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 >= 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 <= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_14(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 == r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s> r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_16(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 s>= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_17(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s< r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_18(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 5; \
+ if r2 s<= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_19(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 != r0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_20(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ w2 = 0; \
+ if w2 > w0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_21(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 > w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_22(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 >= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_23(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 < w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_24(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 >= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 <= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_25(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 == w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_26(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s> w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_27(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 s>= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_28(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s> 4 goto l0_%=; \
+ w2 = 5; \
+ if w2 s< w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_29(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s>= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s<= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_30(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w2 != w0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
new file mode 100644
index 000000000000..4f40144748a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, positive bounds")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_positive_bounds(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ if r1 s> 4 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void checks_mixing_signed_and_unsigned(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 2")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = 0; \
+ r8 += r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 3")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 4")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 1; \
+ r1 &= r2; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 5")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += 4; \
+ r0 -= r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 6")
+__failure __msg("R4 min value is negative, either use unsigned")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_6(void)
+{
+ asm volatile (" \
+ r9 = r1; \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r9; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -512; \
+ r4 = *(u64*)(r10 - 16); \
+ r6 = -1; \
+ if r4 > r6 goto l0_%=; \
+ if r4 s> 1 goto l0_%=; \
+ r4 += 1; \
+ r5 = 0; \
+ r6 = 0; \
+ *(u16*)(r10 - 512) = r6; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 7")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = %[__imm_0]; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 1024 * 1024 * 1024)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 8")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 9")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -9223372036854775808ULL ll; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 10")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 11")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 >= r1 goto l1_%=; \
+ /* Dead branch. */ \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 12")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 13")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ r7 = 1; \
+ if r7 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r7 += r1; \
+ if r7 s> 4 goto l2_%=; \
+ r0 += r7; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 14")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_14(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ r8 = 2; \
+ if r9 == 42 goto l1_%=; \
+ if r8 s> r1 goto l2_%=; \
+l3_%=: if r1 s> 1 goto l2_%=; \
+ r0 += r1; \
+l0_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+l1_%=: if r1 > r2 goto l2_%=; \
+ goto l3_%=; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 15")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 += r1; \
+ if r0 > 1 goto l2_%=; \
+ r0 = 0; \
+ exit; \
+l2_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c
new file mode 100644
index 000000000000..df7697b94007
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cfg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("unreachable")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable(void)
+{
+ asm volatile (" \
+ exit; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unreachable2")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable2(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ goto l0_%=; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ exit; \
+l0_%=: \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump2")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump2(void)
+{
+ asm volatile (" \
+ goto -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop (back-edge)")
+__failure __msg("unreachable insn 1")
+__msg_unpriv("back-edge")
+__naked void loop_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop2 (back-edge)")
+__failure __msg("unreachable insn 4")
+__msg_unpriv("back-edge")
+__naked void loop2_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: r1 = r0; \
+ r2 = r0; \
+ r3 = r0; \
+ goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional loop")
+__failure __msg("infinite loop detected")
+__msg_unpriv("back-edge")
+__naked void conditional_loop(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+l0_%=: r2 = r0; \
+ r3 = r0; \
+ if r1 == 0 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
new file mode 100644
index 000000000000..d6c4a7f3f790
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test1")
+__failure __msg("R0 has value (0x0; 0xffffffff)")
+__naked void with_invalid_return_code_test1(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test2")
+__success
+__naked void with_invalid_return_code_test2(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test3")
+__failure __msg("R0 has value (0x0; 0x3)")
+__naked void with_invalid_return_code_test3(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test4")
+__success
+__naked void with_invalid_return_code_test4(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test5")
+__failure __msg("R0 has value (0x2; 0x0)")
+__naked void with_invalid_return_code_test5(void)
+{
+ asm volatile (" \
+ r0 = 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test6")
+__failure __msg("R0 is not a known value (ctx)")
+__naked void with_invalid_return_code_test6(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test7")
+__failure __msg("R0 has unknown scalar value")
+__naked void with_invalid_return_code_test7(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r2 = *(u32*)(r1 + 4); \
+ r0 *= r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
new file mode 100644
index 000000000000..5ee3d349d6d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_skb.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/skb")
+__description("direct packet read test#1 for CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=76 size=4")
+__retval(0)
+__naked void test_1_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_pkt_type]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_queue_mapping]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_protocol]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_vlan_present]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_protocol, offsetof(struct __sk_buff, protocol)),
+ __imm_const(__sk_buff_queue_mapping, offsetof(struct __sk_buff, queue_mapping)),
+ __imm_const(__sk_buff_vlan_present, offsetof(struct __sk_buff, vlan_present))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#2 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_2_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_vlan_tci]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_vlan_proto]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_priority]); \
+ *(u32*)(r1 + %[__sk_buff_priority]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_ingress_ifindex]);\
+ r8 = *(u32*)(r1 + %[__sk_buff_tc_index]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_hash]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_hash, offsetof(struct __sk_buff, hash)),
+ __imm_const(__sk_buff_ingress_ifindex, offsetof(struct __sk_buff, ingress_ifindex)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)),
+ __imm_const(__sk_buff_tc_index, offsetof(struct __sk_buff, tc_index)),
+ __imm_const(__sk_buff_vlan_proto, offsetof(struct __sk_buff, vlan_proto)),
+ __imm_const(__sk_buff_vlan_tci, offsetof(struct __sk_buff, vlan_tci))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#3 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_3_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_cb_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_cb_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_cb_2]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_cb_3]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_cb_4]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_cb_0]) = r4; \
+ *(u32*)(r1 + %[__sk_buff_cb_1]) = r5; \
+ *(u32*)(r1 + %[__sk_buff_cb_2]) = r6; \
+ *(u32*)(r1 + %[__sk_buff_cb_3]) = r7; \
+ *(u32*)(r1 + %[__sk_buff_cb_4]) = r8; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])),
+ __imm_const(__sk_buff_cb_1, offsetof(struct __sk_buff, cb[1])),
+ __imm_const(__sk_buff_cb_2, offsetof(struct __sk_buff, cb[2])),
+ __imm_const(__sk_buff_cb_3, offsetof(struct __sk_buff, cb[3])),
+ __imm_const(__sk_buff_cb_4, offsetof(struct __sk_buff, cb[4])),
+ __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#4 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_4_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_family]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_remote_ip4]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_local_ip4]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_1]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_2]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_3]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_0]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_2]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_3]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_remote_port]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_local_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_family, offsetof(struct __sk_buff, family)),
+ __imm_const(__sk_buff_local_ip4, offsetof(struct __sk_buff, local_ip4)),
+ __imm_const(__sk_buff_local_ip6_0, offsetof(struct __sk_buff, local_ip6[0])),
+ __imm_const(__sk_buff_local_ip6_1, offsetof(struct __sk_buff, local_ip6[1])),
+ __imm_const(__sk_buff_local_ip6_2, offsetof(struct __sk_buff, local_ip6[2])),
+ __imm_const(__sk_buff_local_ip6_3, offsetof(struct __sk_buff, local_ip6[3])),
+ __imm_const(__sk_buff_local_port, offsetof(struct __sk_buff, local_port)),
+ __imm_const(__sk_buff_remote_ip4, offsetof(struct __sk_buff, remote_ip4)),
+ __imm_const(__sk_buff_remote_ip6_0, offsetof(struct __sk_buff, remote_ip6[0])),
+ __imm_const(__sk_buff_remote_ip6_1, offsetof(struct __sk_buff, remote_ip6[1])),
+ __imm_const(__sk_buff_remote_ip6_2, offsetof(struct __sk_buff, remote_ip6[2])),
+ __imm_const(__sk_buff_remote_ip6_3, offsetof(struct __sk_buff, remote_ip6[3])),
+ __imm_const(__sk_buff_remote_port, offsetof(struct __sk_buff, remote_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of tc_classid for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of data_meta for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void data_meta_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_data_meta]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data_meta, offsetof(struct __sk_buff, data_meta))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of flow_keys for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void flow_keys_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_flow_keys]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_flow_keys, offsetof(struct __sk_buff, flow_keys))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid write access to napi_id for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void napi_id_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_napi_id]) = r9; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("write tstamp from CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=152 size=8")
+__retval(0)
+__naked void write_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_tstamp]) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("read tstamp from CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void read_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
new file mode 100644
index 000000000000..9a13f5c11ac7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_storage.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[TEST_DATA_LEN]);
+} cgroup_storage SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[64]);
+} percpu_cgroup_storage SEC(".maps");
+
+SEC("cgroup/skb")
+__description("valid cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void valid_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void invalid_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("valid per-cpu cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void per_cpu_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void cpu_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void cpu_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c
new file mode 100644
index 000000000000..ba8922b2eebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/const_or.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tracepoint")
+__description("constant register |= constant should keep constant type")
+__success
+__naked void constant_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 13; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant should not bypass stack boundary checks")
+__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 24; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should keep constant type")
+__success
+__naked void register_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 13; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should not bypass stack boundary checks")
+__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 24; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
new file mode 100644
index 000000000000..65edc89799f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ctx_sk_msg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("sk_msg")
+__description("valid access family in SK_MSG")
+__success
+__naked void access_family_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_family]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_family, offsetof(struct sk_msg_md, family))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_ip4 in SK_MSG")
+__success
+__naked void remote_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip4, offsetof(struct sk_msg_md, remote_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_ip4 in SK_MSG")
+__success
+__naked void local_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip4, offsetof(struct sk_msg_md, local_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_port in SK_MSG")
+__success
+__naked void remote_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_port, offsetof(struct sk_msg_md, remote_port))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_port in SK_MSG")
+__success
+__naked void local_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_port, offsetof(struct sk_msg_md, local_port))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access remote_ip6 in SK_MSG")
+__success
+__naked void remote_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip6_0, offsetof(struct sk_msg_md, remote_ip6[0])),
+ __imm_const(sk_msg_md_remote_ip6_1, offsetof(struct sk_msg_md, remote_ip6[1])),
+ __imm_const(sk_msg_md_remote_ip6_2, offsetof(struct sk_msg_md, remote_ip6[2])),
+ __imm_const(sk_msg_md_remote_ip6_3, offsetof(struct sk_msg_md, remote_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access local_ip6 in SK_MSG")
+__success
+__naked void local_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip6_0, offsetof(struct sk_msg_md, local_ip6[0])),
+ __imm_const(sk_msg_md_local_ip6_1, offsetof(struct sk_msg_md, local_ip6[1])),
+ __imm_const(sk_msg_md_local_ip6_2, offsetof(struct sk_msg_md, local_ip6[2])),
+ __imm_const(sk_msg_md_local_ip6_3, offsetof(struct sk_msg_md, local_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access size in SK_MSG")
+__success
+__naked void access_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid 64B read of size in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void of_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read past end of SK_MSG")
+__failure __msg("invalid bpf_context access")
+__naked void past_end_of_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, size) + 4)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read offset in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_offset_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, family) + 1)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet read for SK_MSG")
+__success
+__naked void packet_read_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet write for SK_MSG")
+__success
+__naked void packet_write_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("overlapping checks for direct packet access SK_MSG")
+__success
+__naked void direct_packet_access_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u16*)(r2 + 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
new file mode 100644
index 000000000000..c538c6893552
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test1")
+__failure __msg("fp pointer and 2147483647")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x7fffffff; \
+ r1 += 0x7fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test2")
+__failure __msg("fp pointer and 1073741823")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x3fffffff; \
+ r1 += 0x3fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test3")
+__failure __msg("fp pointer offset 1073741822")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void with_32_bit_wraparound_test3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x1fffffff; \
+ r1 += 0x1fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div0.c b/tools/testing/selftests/bpf/progs/verifier_div0.c
new file mode 100644
index 000000000000..cca5ea18fc28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div0.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div0.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("DIV32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void div64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void mod64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check ok, cls")
+__success __retval(8)
+__naked void _0_zero_check_ok_cls_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 2; \
+ w2 = 16; \
+ w2 /= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 1, cls")
+__success __retval(0)
+__naked void _0_zero_check_1_cls_1(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 2, cls")
+__success __retval(0)
+__naked void _0_zero_check_2_cls_1(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 by 0, zero check, cls")
+__success __retval(0)
+__naked void by_0_zero_check_cls(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ r0 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check ok, cls")
+__success __retval(2)
+__naked void _0_zero_check_ok_cls_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 3; \
+ w2 = 5; \
+ w2 %%= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 1, cls")
+__success __retval(1)
+__naked void _0_zero_check_1_cls_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 2, cls")
+__success __retval(1)
+__naked void _0_zero_check_2_cls_2(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 1, cls")
+__success __retval(2)
+__naked void _0_zero_check_1_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 2; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 2, cls")
+__success __retval(-1)
+__naked void _0_zero_check_2_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = -1; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
new file mode 100644
index 000000000000..458984da804c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div_overflow.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+/* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+
+SEC("tc")
+__description("DIV32 overflow, check 1")
+__success __retval(0)
+__naked void div32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 /= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 overflow, check 2")
+__success __retval(0)
+__naked void div32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 /= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 1")
+__success __retval(0)
+__naked void div64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r2 /= r1; \
+ w0 = 0; \
+ if r0 == r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 2")
+__success __retval(0)
+__naked void div64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 /= -1; \
+ w0 = 0; \
+ if r0 == r1 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 1")
+__success __retval(INT_MIN)
+__naked void mod32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 %%= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 2")
+__success __retval(INT_MIN)
+__naked void mod32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 %%= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 1")
+__success __retval(1)
+__naked void mod64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= r1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 2")
+__success __retval(1)
+__naked void mod64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= -1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
new file mode 100644
index 000000000000..50c6b22606f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_access_var_len.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, correct bounds")
+__success
+__naked void bitwise_and_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 64; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, bitwise AND, zero included")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_bitwise_and_zero_included(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use bitwise AND to limit r3 range to [0, 64] */\
+ r3 &= 64; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, wrong max")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void bitwise_and_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 65; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, correct bounds")
+__success
+__naked void memory_stack_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), correct bounds")
+__success
+__naked void stack_jmp_signed_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, bounds + offset")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_bounds_offset(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r2 += 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, wrong max")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 65 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, no max check")
+__failure
+/* because max wasn't checked, signed min is negative */
+__msg("R2 min value is negative, either use unsigned or 'var &= const'")
+__naked void stack_jmp_no_max_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, JMP, no min check")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_jmp_no_min_check(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use JMP to limit r3 range to [0, 64] */ \
+ if r3 > 64 goto l0_%=; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), no min check")
+__failure __msg("R2 min value is negative")
+__naked void jmp_signed_no_min_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, correct bounds")
+__success
+__naked void memory_map_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[sizeof_test_val] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, wrong max")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=49")
+__naked void memory_map_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 1)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, correct bounds")
+__success
+__naked void map_adjusted_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 20),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, wrong max")
+__failure __msg("R1 min value is outside of the allowed memory range")
+__naked void map_adjusted_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 19)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + 0); \
+ r1 = 0; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ r2 &= 64; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ *(u64*)(r1 + 0) = r2; \
+ r2 &= 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_5(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ *(u64*)(r1 + 0) = r2; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_6(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+/* csum_diff of 64-byte packet */
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void ptr_to_mem_or_null_7(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r6; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r6; \
+ r2 = *(u64*)(r6 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_8(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_9(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_10(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_11(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_12(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_13(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: 8 bytes leak")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64")
+__retval(0)
+__naked void variable_memory_8_bytes_leak(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ /* Note: fp[-32] left uninitialized */ \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Limit r3 range to [1, 64] */ \
+ r3 &= 63; \
+ r3 += 1; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory region [1, 64]\
+ * at &fp[-64] is not fully initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: 8 bytes no leak (init memory)")
+__success
+__naked void bytes_no_leak_init_memory(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 0; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r1 += -64; \
+ r2 = 0; \
+ r2 &= 32; \
+ r2 += 32; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r1 = *(u64*)(r10 - 16); \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
new file mode 100644
index 000000000000..74f5f9cd153d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("xdp")
+__description("helper access to packet: test1, valid packet_ptr range")
+__success __retval(0)
+__naked void test1_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test2, unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void packet_test2_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test3, variable add")
+__success __retval(0)
+__naked void to_packet_test3_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test4, packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test5, packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test6, cls valid packet_ptr range")
+__success __retval(0)
+__naked void cls_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test7, cls unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void test7_cls_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test8, cls variable add")
+__success __retval(0)
+__naked void packet_test8_cls_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test9, cls packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test10, cls packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test11, cls unsuitable helper 1")
+__failure __msg("helper access to the packet")
+__naked void test11_cls_unsuitable_helper_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r3 = r6; \
+ r3 += 7; \
+ if r3 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 42; \
+ r5 = 0; \
+ call %[bpf_skb_store_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_store_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test12, cls unsuitable helper 2")
+__failure __msg("helper access to the packet")
+__naked void test12_cls_unsuitable_helper_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = r6; \
+ r6 += 8; \
+ if r6 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 4; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test13, cls helper ok")
+__success __retval(0)
+__naked void packet_test13_cls_helper_ok(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test14, cls helper ok sub")
+__success __retval(0)
+__naked void test14_cls_helper_ok_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 4; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test15, cls helper fail sub")
+__failure __msg("invalid access to packet")
+__naked void test15_cls_helper_fail_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 12; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test16, cls helper fail range 1")
+__failure __msg("invalid access to packet")
+__naked void cls_helper_fail_range_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test17, cls helper fail range 2")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = -9; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test18, cls helper fail range 3")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_3(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test19, cls helper range zero")
+__success __retval(0)
+__naked void test19_cls_helper_range_zero(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test20, pkt end as input")
+__failure __msg("R1 type=pkt_end expected=fp")
+__naked void test20_pkt_end_as_input(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r7; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test21, wrong reg")
+__failure __msg("invalid access to packet")
+__naked void to_packet_test21_wrong_reg(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
new file mode 100644
index 000000000000..0ede0ccd090c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_restricted.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct val);
+} map_spin_lock SEC(".maps");
+
+struct timer {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct timer);
+} map_timer SEC(".maps");
+
+SEC("kprobe")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_kprobe_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_perf_event_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_raw_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_kprobe_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_perf_event_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_raw_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_kprobe_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_perf_event_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_raw_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
new file mode 100644
index 000000000000..692216c0ad3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -0,0 +1,1245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_value_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to map: full range")
+__success
+__naked void access_to_map_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: partial range")
+__success
+__naked void access_to_map_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: empty range")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=0")
+__naked void access_to_map_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=56")
+__naked void map_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 8)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: negative range")
+__failure __msg("R2 min value is negative")
+__naked void access_to_map_negative_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): full range")
+__success
+__naked void via_const_imm_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): partial range")
+__success
+__naked void via_const_imm_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): empty range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=0")
+__naked void via_const_imm_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void imm_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): full range")
+__success
+__naked void via_const_reg_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): partial range")
+__success
+__naked void via_const_reg_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): empty range")
+__failure __msg("R1 min value is outside of the allowed memory range")
+__naked void via_const_reg_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = 0; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void reg_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): full range")
+__success
+__naked void map_via_variable_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): partial range")
+__success
+__naked void map_via_variable_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): empty range")
+__failure __msg("R1 min value is outside of the allowed memory range")
+__naked void map_via_variable_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): no max check")
+__failure __msg("R1 unbounded memory access")
+__naked void via_variable_no_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r1 += r3; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=45")
+__naked void via_variable_wrong_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, good access")
+__success
+__naked void bounds_check_using_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, good access")
+__success
+__naked void bounds_check_using_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access")
+__success
+__naked void check_using_s_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access 2")
+__success
+__naked void using_s_good_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access")
+__success
+__naked void check_using_s_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access 2")
+__success
+__naked void using_s_good_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map lookup helper access to map")
+__success
+__naked void lookup_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map")
+__success
+__naked void update_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map: wrong size")
+__failure __msg("invalid access to map value, value_size=8 off=0 size=16")
+__naked void access_to_map_wrong_size(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm)")
+__success
+__naked void adjusted_map_via_const_imm(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[other_val_bar]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void imm_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[__imm_0]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void imm_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += -4; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg)")
+__success
+__naked void adjusted_map_via_const_reg(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[other_val_bar]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void reg_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[__imm_0]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void reg_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = -4; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable)")
+__success
+__naked void to_adjusted_map_via_variable(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[other_val_bar] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): no max check")
+__failure
+__msg("R2 unbounded memory access, make sure to bounds check any such access")
+__naked void via_variable_no_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=16 off=9 size=8")
+__naked void via_variable_wrong_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[__imm_0] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, offsetof(struct other_val, bar) + 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
new file mode 100644
index 000000000000..b054f9c48143
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/int_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG uninitialized")
+__failure __msg("invalid indirect read from stack R4 off -16+0 size 8")
+__naked void arg_ptr_to_long_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ARG_PTR_TO_LONG half-uninitialized")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R4 off -16+4 size 8")
+__retval(0)
+__naked void ptr_to_long_half_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG misaligned")
+__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8")
+__naked void arg_ptr_to_long_misaligned(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -12; \
+ r0 = 0; \
+ *(u32*)(r7 + 0) = r0; \
+ *(u64*)(r7 + 4) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG size < sizeof(long)")
+__failure __msg("invalid indirect access to stack R4 off=-4 size=8")
+__naked void to_long_size_sizeof_long(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -16; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += 12; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG initialized")
+__success
+__naked void arg_ptr_to_long_initialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u64*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
new file mode 100644
index 000000000000..c925ba9a2e74
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ld_ind.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("ld_ind: check calling conv, r1")
+__failure __msg("R1 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r2")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r3")
+__failure __msg("R3 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r3(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r3 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r4")
+__failure __msg("R4 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r4 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r4; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r5")
+__failure __msg("R5 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r5(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r5 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r5; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r7")
+__success __success_unpriv __retval(1)
+__naked void ind_check_calling_conv_r7(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
new file mode 100644
index 000000000000..d153fbe50055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/leak_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("leak pointer into ctx 1")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R2 leaks addr into mem")
+__naked void leak_pointer_into_ctx_1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r2 = %[map_hash_8b] ll; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 2")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R10 leaks addr into mem")
+__naked void leak_pointer_into_ctx_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r10; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 3")
+__success __failure_unpriv __msg_unpriv("R2 leaks addr into ctx")
+__retval(0)
+__naked void leak_pointer_into_ctx_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = %[map_hash_8b] ll; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into map val")
+__success __failure_unpriv __msg_unpriv("R6 leaks addr into mem")
+__retval(0)
+__naked void leak_pointer_into_map_val(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 0; \
+ *(u64*)(r0 + 0) = r3; \
+ lock *(u64 *)(r0 + 0) += r6; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
new file mode 100644
index 000000000000..11a079145966
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+SEC("socket")
+__description("bpf_map_ptr: read with negative offset rejected")
+__failure __msg("R1 is bpf_array invalid negative access: off=-8")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void read_with_negative_offset_rejected(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 - 8); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: write rejected")
+__failure __msg("only read from bpf_array is supported")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void bpf_map_ptr_write_rejected(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ *(u64*)(r1 + 0) = r2; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: read non-existent field rejected")
+__failure
+__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_non_existent_field_rejected(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u32*)(r1 + 1); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: read ops field accepted")
+__success __failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__retval(1)
+__naked void ptr_read_ops_field_accepted(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 + 0); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, map_ptr = map_ptr + r")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(0)
+__naked void map_ptr_map_ptr_r(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r0 = 0; \
+ r1 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, r = r + map_ptr")
+__success __failure_unpriv
+__msg_unpriv("R0 has pointer with unsupported alu operation")
+__retval(0)
+__naked void _0_r_r_map_ptr(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ r0 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
new file mode 100644
index 000000000000..1639628b832d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ret_val.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("invalid map_fd for function call")
+__failure __msg("fd 0 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void map_fd_for_function_call(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_map_delete_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_delete_elem),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("don't check return value before access")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__failure_unpriv
+__naked void check_return_value_before_access(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("access memory with incorrect alignment")
+__failure __msg("misaligned value access")
+__failure_unpriv
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sometimes access memory with incorrect alignment")
+__failure __msg("R0 invalid mem access")
+__msg_unpriv("R0 leaks addr")
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+l0_%=: r1 = 1; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_masking.c b/tools/testing/selftests/bpf/progs/verifier_masking.c
new file mode 100644
index 000000000000..5732cc1b4c47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_masking.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/masking.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("masking, test out of bounds 1")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 2")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 3")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 4")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 5")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 6")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 7")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_7(void)
+{
+ asm volatile (" \
+ r1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 8")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_8(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 9")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_9(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 10")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_10(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 11")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_11(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 12")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_12(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 1")
+__success __success_unpriv __retval(4)
+__naked void masking_test_in_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 4; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 2")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 3")
+__success __success_unpriv __retval(0xfffffffe)
+__naked void masking_test_in_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xfffffffe; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 4")
+__success __success_unpriv __retval(0xabcde)
+__naked void masking_test_in_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xabcde; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xabcdef - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 5")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 6")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = 46; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 7")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_7(void)
+{
+ asm volatile (" \
+ r3 = -46; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 8")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_8(void)
+{
+ asm volatile (" \
+ r3 = -47; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
new file mode 100644
index 000000000000..d81722fb5f19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/meta_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("meta access, test1")
+__success __retval(0)
+__naked void meta_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test2")
+__failure __msg("invalid access to packet, off=-8")
+__naked void meta_access_test2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 -= 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test3")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test4")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r4; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test5")
+__failure __msg("R3 !read_ok")
+__naked void meta_access_test5(void)
+{
+ asm volatile (" \
+ r3 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ if r0 > r4 goto l0_%=; \
+ r2 = -8; \
+ call %[bpf_xdp_adjust_meta]; \
+ r0 = *(u8*)(r3 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_xdp_adjust_meta),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test6")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test7")
+__success __retval(0)
+__naked void meta_access_test7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test8")
+__success __retval(0)
+__naked void meta_access_test8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test9")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ r4 += 1; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test10")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r3 += r5; \
+ r5 = r3; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r5 goto l0_%=; \
+ r2 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test11")
+__success __retval(0)
+__naked void meta_access_test11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r2 += r5; \
+ r5 = r2; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r3 goto l0_%=; \
+ r5 = *(u8*)(r5 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test12")
+__success __retval(0)
+__naked void meta_access_test12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = r3; \
+ r5 += 16; \
+ if r5 > r4 goto l0_%=; \
+ r0 = *(u8*)(r3 + 0); \
+ r5 = r2; \
+ r5 += 16; \
+ if r5 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
new file mode 100644
index 000000000000..efbfc3a4ad6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc")
+__description("raw_stack: no skb_load_bytes")
+__failure __msg("invalid read from stack R6 off=-8 size=8")
+__naked void stack_no_skb_load_bytes(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ /* Call to skb_load_bytes() omitted. */ \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len")
+__failure __msg("R4 min value is negative")
+__naked void skb_load_bytes_negative_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = -8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len 2")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_negative_len_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = %[__imm_0]; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__imm_0, ~0)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, zero len")
+__failure __msg("invalid zero-sized read")
+__naked void skb_load_bytes_zero_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, no init")
+__success __retval(0)
+__naked void skb_load_bytes_no_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, init")
+__success __retval(0)
+__naked void stack_skb_load_bytes_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = 0xcafe; \
+ *(u64*)(r6 + 0) = r3; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs around bounds")
+__success __retval(0)
+__naked void bytes_spilled_regs_around_bounds(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption")
+__failure __msg("R0 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_bytes_spilled_regs_corruption(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption 2")
+__failure __msg("R3 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bytes_spilled_regs_corruption_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r3 = *(u32*)(r3 + %[__sk_buff_pkt_type]); \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs + data")
+__success __retval(0)
+__naked void load_bytes_spilled_regs_data(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 1")
+__failure __msg("invalid indirect access to stack R3 off=-513 size=8")
+__naked void load_bytes_invalid_access_1(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -513; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 2")
+__failure __msg("invalid indirect access to stack R3 off=-1 size=8")
+__naked void load_bytes_invalid_access_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 3")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_invalid_access_3(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += 0xffffffff; \
+ r3 = r6; \
+ r4 = 0xffffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 4")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_4(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 5")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_5(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 6")
+__failure __msg("invalid zero-sized read")
+__naked void load_bytes_invalid_access_6(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, large access")
+__success __retval(0)
+__naked void skb_load_bytes_large_access(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 512; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
new file mode 100644
index 000000000000..14a0172e2141
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_tp_writable.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("raw_tracepoint.w")
+__description("raw_tracepoint_writable: reject variable offset")
+__failure
+__msg("R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void tracepoint_writable_reject_variable_offset(void)
+{
+ asm volatile (" \
+ /* r6 is our tp buffer */ \
+ r6 = *(u64*)(r1 + 0); \
+ r1 = %[map_hash_8b] ll; \
+ /* move the key (== 0) to r10-8 */ \
+ w0 = 0; \
+ r2 = r10; \
+ r2 += -8; \
+ *(u64*)(r2 + 0) = r0; \
+ /* lookup in the map */ \
+ call %[bpf_map_lookup_elem]; \
+ /* exit clean if null */ \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* shift the buffer pointer to a variable location */\
+ r0 = *(u32*)(r0 + 0); \
+ r6 += r0; \
+ /* clobber whatever's there */ \
+ r7 = 4242; \
+ *(u64*)(r6 + 0) = r7; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ringbuf.c b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
new file mode 100644
index 000000000000..ae1d521f326c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ringbuf.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 1")
+__failure __msg("R1 must have zero offset when passed to release func")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_1(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ /* add invalid offset to reserved ringbuf memory */\
+ r1 += 0xcafe; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 2")
+__failure __msg("R7 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_2(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* add invalid offset to reserved ringbuf memory */\
+ r7 += 0xcafe; \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("ringbuf: check passing rb mem to helpers")
+__success __retval(0)
+__naked void passing_rb_mem_to_helpers(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ r7 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* pass allocated ring buffer memory to fib lookup */\
+ r1 = r6; \
+ r2 = r0; \
+ r3 = 8; \
+ r4 = 0; \
+ call %[bpf_fib_lookup]; \
+ /* submit the ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_fib_lookup),
+ __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
new file mode 100644
index 000000000000..136e5530b72c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/spill_fill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("check valid spill/fill")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(POINTER_VALUE)
+__naked void check_valid_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* fill it back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* should be able to access R0 = *(R2 + 8) */ \
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */\
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, skb mark")
+__success __success_unpriv __retval(0)
+__naked void valid_spill_fill_skb_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ *(u64*)(r10 - 8) = r6; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, ptr to mem")
+__success __success_unpriv __retval(0)
+__naked void spill_fill_ptr_to_mem(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check with invalid reg offset 0")
+__failure __msg("R0 pointer arithmetic on ringbuf_mem_or_null prohibited")
+__failure_unpriv
+__naked void with_invalid_reg_offset_0(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* add invalid offset to memory or NULL */ \
+ r0 += 1; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* should not be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u32*)(r6 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r6; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("attempt to corrupt spilled")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_corrupted_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* mess up with R1 pointer on stack */ \
+ r0 = 0x23; \
+ *(u8*)(r10 - 7) = r0; \
+ /* fill back into R0 is fine for priv. \
+ * R0 now becomes SCALAR_VALUE. \
+ */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Load from R0 should fail. */ \
+ r0 = *(u64*)(r0 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, LSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_lsb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0xcafe; \
+ *(u16*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, MSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_msb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0x12345678; \
+ *(u32*)(r10 - 4) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u32*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */ \
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("Spill a u32 const, refill from another half of the uninit u32 from the stack")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -4+0 size 4")
+__retval(0)
+__naked void uninit_u32_from_the_stack(void)
+{
+ asm volatile (" \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void u16_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u16*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill u32 const scalars. Refill as u64. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void u64_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w6 = 0; \
+ w7 = 20; \
+ *(u32*)(r10 - 4) = r6; \
+ *(u32*)(r10 - 8) = r7; \
+ r4 = *(u16*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void _6_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u16*)(r10 - 6); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void addr_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ *(u32*)(r10 - 4) = r4; \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a umax=40 bounded scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ if r4 <= 40 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: /* *(u32 *)(r10 -8) = r4 R4=umax=40 */ \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = (*u32 *)(r10 - 8) */ \
+ r4 = *(u32*)(r10 - 8); \
+ /* r2 += r4 R2=pkt R4=umax=40 */ \
+ r2 += r4; \
+ /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */ \
+ r0 = r2; \
+ /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ \
+ r2 += 20; \
+ /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */\
+ if r2 > r3 goto l1_%=; \
+ /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */\
+ r0 = *(u32*)(r0 + 0); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 scalar at fp-4 and then at fp-8")
+__success __retval(0)
+__naked void and_then_at_fp_8(void)
+{
+ asm volatile (" \
+ w4 = 4321; \
+ *(u32*)(r10 - 4) = r4; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
new file mode 100644
index 000000000000..e0f77e3e7869
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/stack_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+SEC("socket")
+__description("PTR_TO_STACK store/load")
+__success __success_unpriv __retval(0xfaceb00c)
+__naked void ptr_to_stack_store_load(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on off")
+__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_off(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on reg")
+__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds low")
+__failure __msg("invalid write to stack R1 off=-79992 size=8")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void load_out_of_bounds_low(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -80000; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds high")
+__failure __msg("invalid write to stack R1 off=0 size=8")
+__failure_unpriv
+__naked void load_out_of_bounds_high(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -1; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 2")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 3")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_high_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 4")
+__failure __msg("invalid write to stack R1 off=0 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 5")
+__failure __msg("invalid write to stack R1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_low_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 2")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_low_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 1) = r0; \
+ r0 = *(u8*)(r1 + 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 3")
+__failure __msg("invalid write to stack R1 off=-513 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 4")
+__failure __msg("math between fp pointer")
+__failure_unpriv
+__naked void to_stack_check_low_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[int_min]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 5")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 1")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 2")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = 0; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r5 = r10; \
+ r0 = *(u8*)(r5 - 6); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 3")
+__success __success_unpriv __retval(-3)
+__naked void stack_mixed_reg_k_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK reg")
+__success __success_unpriv __retval(42)
+__naked void ptr_to_stack_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack pointer arithmetic")
+__success __success_unpriv __retval(0)
+__naked void stack_pointer_arithmetic(void)
+{
+ asm volatile (" \
+ r1 = 4; \
+ goto l0_%=; \
+l0_%=: r7 = r10; \
+ r7 += -10; \
+ r7 += -10; \
+ r2 = r7; \
+ r2 += r1; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r2 = r7; \
+ r2 += 8; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("store PTR_TO_STACK in R10 to array map using BPF_B")
+__success __retval(42)
+__naked void array_map_using_bpf_b(void)
+{
+ asm volatile (" \
+ /* Load pointer to map. */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = 2; \
+ exit; \
+l0_%=: r1 = r0; \
+ /* Copy R10 to R9. */ \
+ r9 = r10; \
+ /* Pollute other registers with unaligned values. */\
+ r2 = -1; \
+ r3 = -1; \
+ r4 = -1; \
+ r5 = -1; \
+ r6 = -1; \
+ r7 = -1; \
+ r8 = -1; \
+ /* Store both R9 and R10 with BPF_B and read back. */\
+ *(u8*)(r1 + 0) = r10; \
+ r2 = *(u8*)(r1 + 0); \
+ *(u8*)(r1 + 0) = r9; \
+ r3 = *(u8*)(r1 + 0); \
+ /* Should read back as same value. */ \
+ if r2 == r3 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l1_%=: r0 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_uninit.c b/tools/testing/selftests/bpf/progs/verifier_uninit.c
new file mode 100644
index 000000000000..7718cd7d19ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_uninit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/uninit.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("read uninitialized register")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void read_uninitialized_register(void)
+{
+ asm volatile (" \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("read invalid register")
+__failure __msg("R15 is invalid")
+__failure_unpriv
+__naked void read_invalid_register(void)
+{
+ asm volatile (" \
+ .8byte %[mov64_reg]; \
+ exit; \
+" :
+ : __imm_insn(mov64_reg, BPF_MOV64_REG(BPF_REG_0, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit")
+__failure __msg("R0 !read_ok")
+__failure_unpriv
+__naked void t_init_r0_before_exit(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit in all branches")
+__failure __msg("R0 !read_ok")
+__msg_unpriv("R1 pointer comparison")
+__naked void before_exit_in_all_branches(void)
+{
+ asm volatile (" \
+ if r1 >= 0 goto l0_%=; \
+ r0 = 1; \
+ r0 += 2; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value.c b/tools/testing/selftests/bpf/progs/verifier_value.c
new file mode 100644
index 000000000000..b5af6b6f5acd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value store of cleared call register")
+__failure __msg("R1 !read_ok")
+__failure_unpriv __msg_unpriv("R1 !read_ok")
+__naked void store_of_cleared_call_register(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned store")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_store(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += 3; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = 43; \
+ *(u64*)(r0 + 2) = r1; \
+ r1 = 44; \
+ *(u64*)(r0 - 2) = r1; \
+ r8 = r0; \
+ r1 = 32; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 33; \
+ *(u64*)(r8 + 2) = r1; \
+ r1 = 34; \
+ *(u64*)(r8 - 2) = r1; \
+ r8 += 5; \
+ r1 = 22; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r8 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r8 - 7) = r1; \
+ r7 = r8; \
+ r7 += 3; \
+ r1 = 22; \
+ *(u64*)(r7 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r7 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r7 - 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned load")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_load(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r0 += 3; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 2); \
+ r8 = r0; \
+ r7 = *(u64*)(r8 + 0); \
+ r7 = *(u64*)(r8 + 2); \
+ r0 += 5; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 4); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += %[test_val_foo]; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
new file mode 100644
index 000000000000..d7a5ba9bbe6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_adj_spill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value or null is marked on register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_marked_on_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = r10; \
+ r1 += -152; \
+ *(u64*)(r1 + 0) = r0; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_or_null.c b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
new file mode 100644
index 000000000000..8ff668a242eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_or_null.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("multiple registers share map_lookup_elem result")
+__success __retval(0)
+__naked void share_map_lookup_elem_result(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 1")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 += -2; \
+ r4 += 2; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 2")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 &= -1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 3")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 <<= 1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid memory access with multiple map_lookup_elem calls")
+__failure __msg("R4 !read_ok")
+__naked void multiple_map_lookup_elem_calls(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid indirect map_lookup_elem access with 2nd lookup in branch")
+__success __retval(0)
+__naked void with_2nd_lookup_in_branch(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r2 = 10; \
+ if r2 != 0 goto l0_%=; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r4 = r0; \
+ if r0 == 0 goto l1_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access from else condition")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_access_from_else_condition(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[__imm_0] goto l1_%=; \
+ r1 += 1; \
+l1_%=: r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES-1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("map lookup and null branch prediction")
+__success __retval(0)
+__naked void lookup_and_null_branch_prediction(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r6 = r0; \
+ if r6 == 0 goto l0_%=; \
+ if r6 != 0 goto l0_%=; \
+ r10 += 10; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("MAP_VALUE_OR_NULL check_ids() in regsafe()")
+__failure __msg("R8 invalid mem access 'map_value_or_null'")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void null_check_ids_in_regsafe(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ /* r9 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ /* r8 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * r9 = r8 ; optionally share ID between r9 and r8\
+ */ \
+ if r6 > r7 goto l0_%=; \
+ r9 = r8; \
+l0_%=: /* if r9 == 0 goto <exit> */ \
+ if r9 == 0 goto l1_%=; \
+ /* read map value via r8, this is not always \
+ * safe because r8 might be not equal to r9. \
+ */ \
+ r0 = *(u64*)(r8 + 0); \
+l1_%=: /* exit 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
new file mode 100644
index 000000000000..83a90afba785
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/var_off.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("lwt_in")
+__description("variable-offset ctx access")
+__failure __msg("variable ctx access var_off=(0x0; 0x4)")
+__naked void variable_offset_ctx_access(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ /* add it to skb. We now have either &skb->len or\
+ * &skb->pkt_type, but we don't know which \
+ */ \
+ r1 += r2; \
+ /* dereference it */ \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("variable-offset stack read, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_read_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("lwt_in")
+__description("variable-offset stack read, uninitialized")
+__failure __msg("invalid variable-offset read from stack R2")
+__naked void variable_offset_stack_read_uninitialized(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write, priv vs unpriv")
+__success __failure_unpriv
+/* Variable stack access is rejected for unprivileged.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Now read from the address we just wrote. This shows\
+ * that, after a variable-offset write, a priviledged\
+ * program can read the slots that were in the range of\
+ * that write (even if the verifier doesn't actually know\
+ * if the slot being read was really written to or not.\
+ */ \
+ r3 = *(u64*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write clobbers spilled regs")
+__failure
+/* In the priviledged case, dereferencing a spilled-and-then-filled
+ * register is rejected because the previous variable offset stack
+ * write might have overwritten the spilled pointer (i.e. we lose track
+ * of the spilled register when we analyze the write).
+ */
+__msg("R2 invalid mem access 'scalar'")
+__failure_unpriv
+/* The unprivileged case is not too interesting; variable
+ * stack access is rejected.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void stack_write_clobbers_spilled_regs(void)
+{
+ asm volatile (" \
+ /* Dummy instruction; needed because we need to patch the next one\
+ * and we can't patch the first instruction. \
+ */ \
+ r6 = 0; \
+ /* Make R0 a map ptr */ \
+ r0 = %[map_hash_8b] ll; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which. \
+ */ \
+ r2 += r10; \
+ /* Spill R0(map ptr) into stack */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Dereference the unknown value for a stack write */\
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Fill the register back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* Try to dereference R2 for a memory load */ \
+ r0 = *(u64*)(r2 + 8); \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("sockops")
+__description("indirect variable-offset stack access, unbounded")
+__failure __msg("invalid unbounded variable-offset indirect access to stack R4")
+__naked void variable_offset_stack_access_unbounded(void)
+{
+ asm volatile (" \
+ r2 = 6; \
+ r3 = 28; \
+ /* Fill the top 16 bytes of the stack. */ \
+ r4 = 0; \
+ *(u64*)(r10 - 16) = r4; \
+ r4 = 0; \
+ *(u64*)(r10 - 8) = r4; \
+ /* Get an unknown value. */ \
+ r4 = *(u64*)(r1 + %[bpf_sock_ops_bytes_received]);\
+ /* Check the lower bound but don't check the upper one. */\
+ if r4 s< 0 goto l0_%=; \
+ /* Point the lower bound to initialized stack. Offset is now in range\
+ * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.\
+ */ \
+ r4 -= 16; \
+ r4 += r10; \
+ r5 = 8; \
+ /* Dereference it indirectly. */ \
+ call %[bpf_getsockopt]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_getsockopt),
+ __imm_const(bpf_sock_ops_bytes_received, offsetof(struct bpf_sock_ops, bytes_received))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, max out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R2")
+__naked void access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, min out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R2")
+__naked void access_min_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 516; \
+ /* add it to fp. We now have either fp-516 or fp-512, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, min_off < min_initialized")
+__failure __msg("invalid indirect read from stack R2 var_off")
+__naked void access_min_off_min_initialized(void)
+{
+ asm volatile (" \
+ /* Fill only the top 8 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know\
+ * which. fp-16 size 8 is partially uninitialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("indirect variable-offset stack access, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_access_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, ok")
+__success __retval(0)
+__naked void variable_offset_stack_access_ok(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xadd.c b/tools/testing/selftests/bpf/progs/verifier_xadd.c
new file mode 100644
index 000000000000..05a0a55adb45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xadd.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xadd.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("xadd/w check unaligned stack")
+__failure __msg("misaligned stack access off")
+__naked void xadd_w_check_unaligned_stack(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 7) += w0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check unaligned map")
+__failure __msg("misaligned value access off")
+__naked void xadd_w_check_unaligned_map(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 1; \
+ lock *(u32 *)(r0 + 3) += w1; \
+ r0 = *(u32*)(r0 + 3); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("xadd/w check unaligned pkt")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void xadd_w_check_unaligned_pkt(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = 99; \
+ goto l1_%=; \
+l0_%=: r0 = 1; \
+ r1 = 0; \
+ *(u32*)(r2 + 0) = r1; \
+ r1 = 0; \
+ *(u32*)(r2 + 3) = r1; \
+ lock *(u32 *)(r2 + 1) += w0; \
+ lock *(u32 *)(r2 + 2) += w0; \
+ r0 = *(u32*)(r2 + 1); \
+l1_%=: exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 1")
+__success __retval(3)
+__naked void src_dst_got_mangled_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 2")
+__success __retval(3)
+__naked void src_dst_got_mangled_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u32*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u32*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c
new file mode 100644
index 000000000000..50768ed179b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP, using ifindex from netdev")
+__success __retval(1)
+__naked void xdp_using_ifindex_from_netdev(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = *(u32*)(r1 + %[xdp_md_ingress_ifindex]); \
+ if r2 < 1 goto l0_%=; \
+ r0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(xdp_md_ingress_ifindex, offsetof(struct xdp_md, ingress_ifindex))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
new file mode 100644
index 000000000000..df2dfd1b15d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
@@ -0,0 +1,1722 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 1")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 2")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 -= 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c
index 87c247d56f72..67424084a38a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_features.c
+++ b/tools/testing/selftests/bpf/progs/xdp_features.c
@@ -70,7 +70,6 @@ xdp_process_echo_packet(struct xdp_md *xdp, bool dut)
struct tlv_hdr *tlv;
struct udphdr *uh;
__be16 port;
- __u8 *cmd;
if (eh + 1 > (struct ethhdr *)data_end)
return -EINVAL;
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index 4c55b4d79d3d..e1c787815e44 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -12,10 +12,14 @@ struct {
__type(value, __u32);
} xsk SEC(".maps");
+__u64 pkts_skip = 0;
+__u64 pkts_fail = 0;
+__u64 pkts_redir = 0;
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
- __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -26,7 +30,7 @@ int rx(struct xdp_md *ctx)
struct udphdr *udp = NULL;
struct iphdr *iph = NULL;
struct xdp_meta *meta;
- int ret;
+ int err;
data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
@@ -46,17 +50,20 @@ int rx(struct xdp_md *ctx)
udp = NULL;
}
- if (!udp)
+ if (!udp) {
+ __sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
+ }
- if (udp->dest != bpf_htons(9091))
+ /* Forwarding UDP:9091 to AF_XDP */
+ if (udp->dest != bpf_htons(9091)) {
+ __sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
+ }
- bpf_printk("forwarding UDP:9091 to AF_XDP");
-
- ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
- if (ret != 0) {
- bpf_printk("bpf_xdp_adjust_meta returned %d", ret);
+ err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+ if (err) {
+ __sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}
@@ -65,20 +72,19 @@ int rx(struct xdp_md *ctx)
meta = data_meta;
if (meta + 1 > data) {
- bpf_printk("bpf_xdp_adjust_meta doesn't appear to work");
+ __sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}
- if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
- bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
- else
+ err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
+ if (err)
meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
- if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
- bpf_printk("populated rx_hash with %u", meta->rx_hash);
- else
- meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
+ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+ if (err < 0)
+ meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */
+ __sync_add_and_fetch(&pkts_redir, 1);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 77678b034389..d151d406a123 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -21,8 +21,8 @@ struct {
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
- __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -56,7 +56,7 @@ int rx(struct xdp_md *ctx)
if (timestamp == 0)
meta->rx_timestamp = 1;
- bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
+ bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
index cf69d05451c3..85f88d9d7a78 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata2.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
@@ -5,17 +5,18 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
- __u32 *hash) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
int called;
SEC("freplace/rx")
int freplace_rx(struct xdp_md *ctx)
{
+ enum xdp_rss_hash_type type = 0;
u32 hash = 0;
/* Call _any_ metadata function to make sure we don't crash. */
- bpf_xdp_metadata_rx_hash(ctx, &hash);
+ bpf_xdp_metadata_rx_hash(ctx, &hash, &type);
called++;
return XDP_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 4ad73847b8a5..54cf1765118b 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -89,7 +89,6 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
SEC("xdp")
int xdping_client(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct pinginfo *pinginfo = NULL;
struct ethhdr *eth = data;
@@ -153,7 +152,6 @@ int xdping_client(struct xdp_md *ctx)
SEC("xdp")
int xdping_server(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
struct icmphdr *icmph;
diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c
index 7a891a0c3a39..c2dd0c28237a 100644
--- a/tools/testing/selftests/bpf/progs/xdpwall.c
+++ b/tools/testing/selftests/bpf/progs/xdpwall.c
@@ -321,7 +321,6 @@ int edgewall(struct xdp_md *ctx)
void *data = (void *)(long)(ctx->data);
struct fw_match_info match_info = {};
struct pkt_info info = {};
- __u8 parse_err = NO_ERR;
void *transport_hdr;
struct ethhdr *eth;
bool filter_res;
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index 744a01d0e57d..a630c95c7471 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "xsk_xdp_metadata.h"
struct {
__uint(type, BPF_MAP_TYPE_XSKMAP);
@@ -12,6 +13,7 @@ struct {
} xsk SEC(".maps");
static unsigned int idx;
+int count = 0;
SEC("xdp") int xsk_def_prog(struct xdp_md *xdp)
{
@@ -27,4 +29,27 @@ SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, 0, XDP_DROP);
}
+SEC("xdp") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
+{
+ void *data, *data_meta;
+ struct xdp_info *meta;
+ int err;
+
+ /* Reserve enough for all custom metadata. */
+ err = bpf_xdp_adjust_meta(xdp, -(int)sizeof(struct xdp_info));
+ if (err)
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_meta = (void *)(long)xdp->data_meta;
+
+ if (data_meta + sizeof(struct xdp_info) > data)
+ return XDP_DROP;
+
+ meta = data_meta;
+ meta->count = count++;
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh
index 20de7bb873bc..f5109eb0e951 100755
--- a/tools/testing/selftests/bpf/test_ftrace.sh
+++ b/tools/testing/selftests/bpf/test_ftrace.sh
@@ -1,6 +1,11 @@
#!/bin/bash
-TR=/sys/kernel/debug/tracing/
+if [[ -e /sys/kernel/tracing/trace ]]; then
+ TR=/sys/kernel/tracing/
+else
+ TR=/sys/kernel/debug/tracing/
+fi
+
clear_trace() { # reset trace output
echo > $TR/trace
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index bf41390157bf..47e9e076bc8f 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -1,9 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <linux/capability.h>
#include <stdlib.h>
#include <test_progs.h>
#include <bpf/btf.h>
+#include "autoconf_helper.h"
+#include "unpriv_helpers.h"
+#include "cap_helpers.h"
+
#define str_has_pfx(str, pfx) \
(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
@@ -12,16 +17,48 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
+#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
+#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
+#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
+#define TEST_TAG_RETVAL_PFX "comment:test_retval="
+#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
-struct test_spec {
- const char *name;
+/* Warning: duplicated in bpf_misc.h */
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define EFFICIENT_UNALIGNED_ACCESS 1
+#else
+#define EFFICIENT_UNALIGNED_ACCESS 0
+#endif
+
+static int sysctl_unpriv_disabled = -1;
+
+enum mode {
+ PRIV = 1,
+ UNPRIV = 2
+};
+
+struct test_subspec {
+ char *name;
bool expect_failure;
const char **expect_msgs;
size_t expect_msg_cnt;
+ int retval;
+ bool execute;
+};
+
+struct test_spec {
+ const char *prog_name;
+ struct test_subspec priv;
+ struct test_subspec unpriv;
int log_level;
int prog_flags;
+ int mode_mask;
};
static int tester_init(struct test_loader *tester)
@@ -44,17 +81,87 @@ void test_loader_fini(struct test_loader *tester)
free(tester->log_buf);
}
+static void free_test_spec(struct test_spec *spec)
+{
+ free(spec->priv.name);
+ free(spec->unpriv.name);
+ free(spec->priv.expect_msgs);
+ free(spec->unpriv.expect_msgs);
+}
+
+static int push_msg(const char *msg, struct test_subspec *subspec)
+{
+ void *tmp;
+
+ tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *));
+ if (!tmp) {
+ ASSERT_FAIL("failed to realloc memory for messages\n");
+ return -ENOMEM;
+ }
+ subspec->expect_msgs = tmp;
+ subspec->expect_msgs[subspec->expect_msg_cnt++] = msg;
+
+ return 0;
+}
+
+static int parse_int(const char *str, int *val, const char *name)
+{
+ char *end;
+ long tmp;
+
+ errno = 0;
+ if (str_has_pfx(str, "0x"))
+ tmp = strtol(str + 2, &end, 16);
+ else
+ tmp = strtol(str, &end, 10);
+ if (errno || end[0] != '\0') {
+ PRINT_FAIL("failed to parse %s from '%s'\n", name, str);
+ return -EINVAL;
+ }
+ *val = tmp;
+ return 0;
+}
+
+static int parse_retval(const char *str, int *val, const char *name)
+{
+ struct {
+ char *name;
+ int val;
+ } named_values[] = {
+ { "INT_MIN" , INT_MIN },
+ { "POINTER_VALUE", POINTER_VALUE },
+ { "TEST_DATA_LEN", TEST_DATA_LEN },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
+ if (strcmp(str, named_values[i].name) != 0)
+ continue;
+ *val = named_values[i].val;
+ return 0;
+ }
+
+ return parse_int(str, val, name);
+}
+
+/* Uses btf_decl_tag attributes to describe the expected test
+ * behavior, see bpf_misc.h for detailed description of each attribute
+ * and attribute combinations.
+ */
static int parse_test_spec(struct test_loader *tester,
struct bpf_object *obj,
struct bpf_program *prog,
struct test_spec *spec)
{
+ const char *description = NULL;
+ bool has_unpriv_result = false;
+ bool has_unpriv_retval = false;
+ int func_id, i, err = 0;
struct btf *btf;
- int func_id, i;
memset(spec, 0, sizeof(*spec));
- spec->name = bpf_program__name(prog);
+ spec->prog_name = bpf_program__name(prog);
btf = bpf_object__btf(obj);
if (!btf) {
@@ -62,16 +169,16 @@ static int parse_test_spec(struct test_loader *tester,
return -EINVAL;
}
- func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC);
+ func_id = btf__find_by_name_kind(btf, spec->prog_name, BTF_KIND_FUNC);
if (func_id < 0) {
- ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name);
+ ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->prog_name);
return -EINVAL;
}
for (i = 1; i < btf__type_cnt(btf); i++) {
+ const char *s, *val, *msg;
const struct btf_type *t;
- const char *s, *val;
- char *e;
+ int tmp;
t = btf__type_by_id(btf, i);
if (!btf_is_decl_tag(t))
@@ -81,31 +188,54 @@ static int parse_test_spec(struct test_loader *tester,
continue;
s = btf__str_by_offset(btf, t->name_off);
- if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
- spec->expect_failure = true;
+ if (str_has_pfx(s, TEST_TAG_DESCRIPTION_PFX)) {
+ description = s + sizeof(TEST_TAG_DESCRIPTION_PFX) - 1;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+ spec->priv.expect_failure = true;
+ spec->mode_mask |= PRIV;
} else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
- spec->expect_failure = false;
+ spec->priv.expect_failure = false;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = true;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = false;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
- void *tmp;
- const char **msg;
-
- tmp = realloc(spec->expect_msgs,
- (1 + spec->expect_msg_cnt) * sizeof(void *));
- if (!tmp) {
- ASSERT_FAIL("failed to realloc memory for messages\n");
- return -ENOMEM;
- }
- spec->expect_msgs = tmp;
- msg = &spec->expect_msgs[spec->expect_msg_cnt++];
- *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ err = push_msg(msg, &spec->priv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1;
+ err = push_msg(msg, &spec->unpriv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX) - 1;
+ err = parse_retval(val, &spec->priv.retval, "__retval");
+ if (err)
+ goto cleanup;
+ spec->priv.execute = true;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX_UNPRIV)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX_UNPRIV) - 1;
+ err = parse_retval(val, &spec->unpriv.retval, "__retval_unpriv");
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ spec->unpriv.execute = true;
+ has_unpriv_retval = true;
} else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
- errno = 0;
- spec->log_level = strtol(val, &e, 0);
- if (errno || e[0] != '\0') {
- ASSERT_FAIL("failed to parse test log level from '%s'", s);
- return -EINVAL;
- }
+ err = parse_int(val, &spec->log_level, "test log level");
+ if (err)
+ goto cleanup;
} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
@@ -121,17 +251,74 @@ static int parse_test_spec(struct test_loader *tester,
} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
} else /* assume numeric value */ {
- errno = 0;
- spec->prog_flags |= strtol(val, &e, 0);
- if (errno || e[0] != '\0') {
- ASSERT_FAIL("failed to parse test prog flags from '%s'", s);
- return -EINVAL;
- }
+ err = parse_int(val, &tmp, "test prog flags");
+ if (err)
+ goto cleanup;
+ spec->prog_flags |= tmp;
+ }
+ }
+ }
+
+ if (spec->mode_mask == 0)
+ spec->mode_mask = PRIV;
+
+ if (!description)
+ description = spec->prog_name;
+
+ if (spec->mode_mask & PRIV) {
+ spec->priv.name = strdup(description);
+ if (!spec->priv.name) {
+ PRINT_FAIL("failed to allocate memory for priv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ }
+
+ if (spec->mode_mask & UNPRIV) {
+ int descr_len = strlen(description);
+ const char *suffix = " @unpriv";
+ char *name;
+
+ name = malloc(descr_len + strlen(suffix) + 1);
+ if (!name) {
+ PRINT_FAIL("failed to allocate memory for unpriv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ strcpy(name, description);
+ strcpy(&name[descr_len], suffix);
+ spec->unpriv.name = name;
+ }
+
+ if (spec->mode_mask & (PRIV | UNPRIV)) {
+ if (!has_unpriv_result)
+ spec->unpriv.expect_failure = spec->priv.expect_failure;
+
+ if (!has_unpriv_retval) {
+ spec->unpriv.retval = spec->priv.retval;
+ spec->unpriv.execute = spec->priv.execute;
+ }
+
+ if (!spec->unpriv.expect_msgs) {
+ size_t sz = spec->priv.expect_msg_cnt * sizeof(void *);
+
+ spec->unpriv.expect_msgs = malloc(sz);
+ if (!spec->unpriv.expect_msgs) {
+ PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n");
+ err = -ENOMEM;
+ goto cleanup;
}
+ memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz);
+ spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt;
}
}
return 0;
+
+cleanup:
+ free_test_spec(spec);
+ return err;
}
static void prepare_case(struct test_loader *tester,
@@ -148,7 +335,7 @@ static void prepare_case(struct test_loader *tester,
bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
- /* Make sure we set at least minimal log level, unless test requirest
+ /* Make sure we set at least minimal log level, unless test requires
* even higher level already. Make sure to preserve independent log
* level 4 (verifier stats), though.
*/
@@ -172,18 +359,18 @@ static void emit_verifier_log(const char *log_buf, bool force)
}
static void validate_case(struct test_loader *tester,
- struct test_spec *spec,
+ struct test_subspec *subspec,
struct bpf_object *obj,
struct bpf_program *prog,
int load_err)
{
int i, j;
- for (i = 0; i < spec->expect_msg_cnt; i++) {
+ for (i = 0; i < subspec->expect_msg_cnt; i++) {
char *match;
const char *expect_msg;
- expect_msg = spec->expect_msgs[i];
+ expect_msg = subspec->expect_msgs[i];
match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
if (!ASSERT_OK_PTR(match, "expect_msg")) {
@@ -191,7 +378,8 @@ static void validate_case(struct test_loader *tester,
if (env.verbosity == VERBOSE_NONE)
emit_verifier_log(tester->log_buf, true /*force*/);
for (j = 0; j < i; j++)
- fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]);
+ fprintf(stderr,
+ "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]);
fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
return;
}
@@ -200,17 +388,229 @@ static void validate_case(struct test_loader *tester,
}
}
+struct cap_state {
+ __u64 old_caps;
+ bool initialized;
+};
+
+static int drop_capabilities(struct cap_state *caps)
+{
+ const __u64 caps_to_drop = (1ULL << CAP_SYS_ADMIN | 1ULL << CAP_NET_ADMIN |
+ 1ULL << CAP_PERFMON | 1ULL << CAP_BPF);
+ int err;
+
+ err = cap_disable_effective(caps_to_drop, &caps->old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+ return err;
+ }
+
+ caps->initialized = true;
+ return 0;
+}
+
+static int restore_capabilities(struct cap_state *caps)
+{
+ int err;
+
+ if (!caps->initialized)
+ return 0;
+
+ err = cap_enable_effective(caps->old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+ caps->initialized = false;
+ return err;
+}
+
+static bool can_execute_unpriv(struct test_loader *tester, struct test_spec *spec)
+{
+ if (sysctl_unpriv_disabled < 0)
+ sysctl_unpriv_disabled = get_unpriv_disabled() ? 1 : 0;
+ if (sysctl_unpriv_disabled)
+ return false;
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+ return true;
+}
+
+static bool is_unpriv_capable_map(struct bpf_map *map)
+{
+ enum bpf_map_type type;
+ __u32 flags;
+
+ type = bpf_map__type(map);
+
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ flags = bpf_map__map_flags(map);
+ return !(flags & BPF_F_ZERO_SEED);
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_USER_RINGBUF:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int do_prog_test_run(int fd_prog, int *retval)
+{
+ __u8 tmp_out[TEST_DATA_LEN << 2] = {};
+ __u8 tmp_in[TEST_DATA_LEN] = {};
+ int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = tmp_in,
+ .data_size_in = sizeof(tmp_in),
+ .data_out = tmp_out,
+ .data_size_out = sizeof(tmp_out),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
+ saved_errno = errno;
+
+ if (err) {
+ PRINT_FAIL("FAIL: Unexpected bpf_prog_test_run error: %d (%s) ",
+ saved_errno, strerror(saved_errno));
+ return err;
+ }
+
+ ASSERT_OK(0, "bpf_prog_test_run");
+ *retval = topts.retval;
+
+ return 0;
+}
+
+static bool should_do_test_run(struct test_spec *spec, struct test_subspec *subspec)
+{
+ if (!subspec->execute)
+ return false;
+
+ if (subspec->expect_failure)
+ return false;
+
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS) {
+ if (env.verbosity != VERBOSE_NONE)
+ printf("alignment prevents execution\n");
+ return false;
+ }
+
+ return true;
+}
+
/* this function is forced noinline and has short generic name to look better
* in test_progs output (in case of a failure)
*/
static noinline
void run_subtest(struct test_loader *tester,
- const char *skel_name,
- skel_elf_bytes_fn elf_bytes_factory)
+ struct bpf_object_open_opts *open_opts,
+ const void *obj_bytes,
+ size_t obj_byte_cnt,
+ struct test_spec *spec,
+ bool unpriv)
+{
+ struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ struct cap_state caps = {};
+ struct bpf_program *tprog;
+ struct bpf_object *tobj;
+ struct bpf_map *map;
+ int retval;
+ int err;
+
+ if (!test__start_subtest(subspec->name))
+ return;
+
+ if (unpriv) {
+ if (!can_execute_unpriv(tester, spec)) {
+ test__skip();
+ test__end_subtest();
+ return;
+ }
+ if (drop_capabilities(&caps)) {
+ test__end_subtest();
+ return;
+ }
+ }
+
+ tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts);
+ if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
+ goto subtest_cleanup;
+
+ bpf_object__for_each_program(tprog, tobj)
+ bpf_program__set_autoload(tprog, false);
+
+ bpf_object__for_each_program(tprog, tobj) {
+ /* only load specified program */
+ if (strcmp(bpf_program__name(tprog), spec->prog_name) == 0) {
+ bpf_program__set_autoload(tprog, true);
+ break;
+ }
+ }
+
+ prepare_case(tester, spec, tobj, tprog);
+
+ /* By default bpf_object__load() automatically creates all
+ * maps declared in the skeleton. Some map types are only
+ * allowed in priv mode. Disable autoload for such maps in
+ * unpriv mode.
+ */
+ bpf_object__for_each_map(map, tobj)
+ bpf_map__set_autocreate(map, !unpriv || is_unpriv_capable_map(map));
+
+ err = bpf_object__load(tobj);
+ if (subspec->expect_failure) {
+ if (!ASSERT_ERR(err, "unexpected_load_success")) {
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ goto tobj_cleanup;
+ }
+ } else {
+ if (!ASSERT_OK(err, "unexpected_load_failure")) {
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ goto tobj_cleanup;
+ }
+ }
+
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ validate_case(tester, subspec, tobj, tprog, err);
+
+ if (should_do_test_run(spec, subspec)) {
+ /* For some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (!restore_capabilities(&caps))
+ goto tobj_cleanup;
+
+ do_prog_test_run(bpf_program__fd(tprog), &retval);
+ if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
+ goto tobj_cleanup;
+ }
+ }
+
+tobj_cleanup:
+ bpf_object__close(tobj);
+subtest_cleanup:
+ test__end_subtest();
+ restore_capabilities(&caps);
+}
+
+static void process_subtest(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
{
LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
- struct bpf_object *obj = NULL, *tobj;
- struct bpf_program *prog, *tprog;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
const void *obj_bytes;
size_t obj_byte_cnt;
int err;
@@ -224,52 +624,22 @@ void run_subtest(struct test_loader *tester,
return;
bpf_object__for_each_program(prog, obj) {
- const char *prog_name = bpf_program__name(prog);
struct test_spec spec;
- if (!test__start_subtest(prog_name))
- continue;
-
/* if we can't derive test specification, go to the next test */
err = parse_test_spec(tester, obj, prog, &spec);
- if (!ASSERT_OK(err, "parse_test_spec"))
+ if (err) {
+ PRINT_FAIL("Can't parse test spec for program '%s'\n",
+ bpf_program__name(prog));
continue;
-
- tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
- if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
- continue;
-
- bpf_object__for_each_program(tprog, tobj)
- bpf_program__set_autoload(tprog, false);
-
- bpf_object__for_each_program(tprog, tobj) {
- /* only load specified program */
- if (strcmp(bpf_program__name(tprog), prog_name) == 0) {
- bpf_program__set_autoload(tprog, true);
- break;
- }
- }
-
- prepare_case(tester, &spec, tobj, tprog);
-
- err = bpf_object__load(tobj);
- if (spec.expect_failure) {
- if (!ASSERT_ERR(err, "unexpected_load_success")) {
- emit_verifier_log(tester->log_buf, false /*force*/);
- goto tobj_cleanup;
- }
- } else {
- if (!ASSERT_OK(err, "unexpected_load_failure")) {
- emit_verifier_log(tester->log_buf, true /*force*/);
- goto tobj_cleanup;
- }
}
- emit_verifier_log(tester->log_buf, false /*force*/);
- validate_case(tester, &spec, tobj, tprog, err);
+ if (spec.mode_mask & PRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt, &spec, false);
+ if (spec.mode_mask & UNPRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt, &spec, true);
-tobj_cleanup:
- bpf_object__close(tobj);
+ free_test_spec(&spec);
}
bpf_object__close(obj);
@@ -280,5 +650,5 @@ void test_loader__run_subtests(struct test_loader *tester,
skel_elf_bytes_fn elf_bytes_factory)
{
/* see comment in run_subtest() for why we do this function nesting */
- run_subtest(tester, skel_name, elf_bytes_factory);
+ process_subtest(tester, skel_name, elf_bytes_factory);
}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6d5e3022c75f..ea82921110da 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -18,6 +18,7 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <bpf/btf.h>
+#include "json_writer.h"
static bool verbose(void)
{
@@ -269,10 +270,23 @@ static void print_subtest_name(int test_num, int subtest_num,
fprintf(env.stdout, "\n");
}
+static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt)
+{
+ /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a
+ * null byte. Yet in parallel mode, log_buf will be NULL if there is no message.
+ */
+ if (log_cnt) {
+ jsonw_string_field(w, "message", log_buf);
+ } else {
+ jsonw_string_field(w, "message", "");
+ }
+}
+
static void dump_test_log(const struct prog_test_def *test,
const struct test_state *test_state,
bool skip_ok_subtests,
- bool par_exec_result)
+ bool par_exec_result,
+ json_writer_t *w)
{
bool test_failed = test_state->error_cnt > 0;
bool force_log = test_state->force_log;
@@ -296,6 +310,16 @@ static void dump_test_log(const struct prog_test_def *test,
if (test_state->log_cnt && print_test)
print_test_log(test_state->log_buf, test_state->log_cnt);
+ if (w && print_test) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", test->test_name);
+ jsonw_uint_field(w, "number", test->test_num);
+ jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt);
+ jsonw_bool_field(w, "failed", test_failed);
+ jsonw_name(w, "subtests");
+ jsonw_start_array(w);
+ }
+
for (i = 0; i < test_state->subtest_num; i++) {
subtest_state = &test_state->subtest_states[i];
subtest_failed = subtest_state->error_cnt;
@@ -314,6 +338,20 @@ static void dump_test_log(const struct prog_test_def *test,
test->test_name, subtest_state->name,
test_result(subtest_state->error_cnt,
subtest_state->skipped));
+
+ if (w && print_subtest) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", subtest_state->name);
+ jsonw_uint_field(w, "number", i+1);
+ jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt);
+ jsonw_bool_field(w, "failed", subtest_failed);
+ jsonw_end_object(w);
+ }
+ }
+
+ if (w && print_test) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
}
print_test_result(test, test_state);
@@ -591,31 +629,6 @@ out:
return err;
}
-int extract_build_id(char *build_id, size_t size)
-{
- FILE *fp;
- char *line = NULL;
- size_t len = 0;
-
- fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
- if (fp == NULL)
- return -1;
-
- if (getline(&line, &len, fp) == -1)
- goto err;
- pclose(fp);
-
- if (len > size)
- len = size;
- memcpy(build_id, line, len);
- build_id[len] = '\0';
- free(line);
- return 0;
-err:
- pclose(fp);
- return -1;
-}
-
static int finit_module(int fd, const char *param_values, int flags)
{
return syscall(__NR_finit_module, fd, param_values, flags);
@@ -715,6 +728,7 @@ enum ARG_KEYS {
ARG_TEST_NAME_GLOB_DENYLIST = 'd',
ARG_NUM_WORKERS = 'j',
ARG_DEBUG = -1,
+ ARG_JSON_SUMMARY = 'J'
};
static const struct argp_option opts[] = {
@@ -740,6 +754,7 @@ static const struct argp_option opts[] = {
"Number of workers to run in parallel, default to number of cpus." },
{ "debug", ARG_DEBUG, NULL, 0,
"print extra debug information for test_progs." },
+ { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."},
{},
};
@@ -870,6 +885,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case ARG_DEBUG:
env->debug = true;
break;
+ case ARG_JSON_SUMMARY:
+ env->json = fopen(arg, "w");
+ if (env->json == NULL) {
+ perror("Failed to open json summary file");
+ return -errno;
+ }
+ break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
@@ -1017,7 +1039,7 @@ void crash_handler(int signum)
stdio_restore();
if (env.test) {
env.test_state->error_cnt++;
- dump_test_log(env.test, env.test_state, true, false);
+ dump_test_log(env.test, env.test_state, true, false, NULL);
}
if (env.worker_id != -1)
fprintf(stderr, "[%d]: ", env.worker_id);
@@ -1124,7 +1146,7 @@ static void run_one_test(int test_num)
stdio_restore();
- dump_test_log(test, state, false, false);
+ dump_test_log(test, state, false, false, NULL);
}
struct dispatch_data {
@@ -1283,7 +1305,7 @@ static void *dispatch_thread(void *ctx)
} while (false);
pthread_mutex_lock(&stdout_output_lock);
- dump_test_log(test, state, false, true);
+ dump_test_log(test, state, false, true, NULL);
pthread_mutex_unlock(&stdout_output_lock);
} /* while (true) */
error:
@@ -1308,6 +1330,7 @@ static void calculate_summary_and_print_errors(struct test_env *env)
{
int i;
int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
+ json_writer_t *w = NULL;
for (i = 0; i < prog_test_cnt; i++) {
struct test_state *state = &test_states[i];
@@ -1324,6 +1347,22 @@ static void calculate_summary_and_print_errors(struct test_env *env)
succ_cnt++;
}
+ if (env->json) {
+ w = jsonw_new(env->json);
+ if (!w)
+ fprintf(env->stderr, "Failed to create new JSON stream.");
+ }
+
+ if (w) {
+ jsonw_start_object(w);
+ jsonw_uint_field(w, "success", succ_cnt);
+ jsonw_uint_field(w, "success_subtest", sub_succ_cnt);
+ jsonw_uint_field(w, "skipped", skip_cnt);
+ jsonw_uint_field(w, "failed", fail_cnt);
+ jsonw_name(w, "results");
+ jsonw_start_array(w);
+ }
+
/*
* We only print error logs summary when there are failed tests and
* verbose mode is not enabled. Otherwise, results may be incosistent.
@@ -1340,10 +1379,19 @@ static void calculate_summary_and_print_errors(struct test_env *env)
if (!state->tested || !state->error_cnt)
continue;
- dump_test_log(test, state, true, true);
+ dump_test_log(test, state, true, true, w);
}
}
+ if (w) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
+ jsonw_destroy(&w);
+ }
+
+ if (env->json)
+ fclose(env->json);
+
printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 3cbf005747ed..10ba43250668 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -114,6 +114,7 @@ struct test_env {
FILE *stdout;
FILE *stderr;
int nr_cpus;
+ FILE *json;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
@@ -404,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr)
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
-int extract_build_id(char *build_id, size_t size);
int kern_sync_rcu(void);
int trigger_module_test_read(int read_sz);
int trigger_module_test_write(int write_sz);
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 06857b689c11..2dec7dbf29a2 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -571,8 +571,13 @@ setup_xfrm_tunnel()
test_xfrm_tunnel()
{
+ if [[ -e /sys/kernel/tracing/trace ]]; then
+ TRACE=/sys/kernel/tracing/trace
+ else
+ TRACE=/sys/kernel/debug/tracing/trace
+ fi
config_device
- > /sys/kernel/debug/tracing/trace
+ > ${TRACE}
setup_xfrm_tunnel
mkdir -p ${BPF_PIN_TUNNEL_DIR}
bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
@@ -581,11 +586,11 @@ test_xfrm_tunnel()
${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
sleep 1
- grep "reqid 1" /sys/kernel/debug/tracing/trace
+ grep "reqid 1" ${TRACE}
check_err $?
- grep "spi 0x1" /sys/kernel/debug/tracing/trace
+ grep "spi 0x1" ${TRACE}
check_err $?
- grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+ grep "remote ip 0xac100164" ${TRACE}
check_err $?
cleanup
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 49a70d9beb0b..e4657c5bc3f1 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -33,13 +33,8 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#ifdef HAVE_GENHDR
-# include "autoconf.h"
-#else
-# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
-# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
-# endif
-#endif
+#include "autoconf_helper.h"
+#include "unpriv_helpers.h"
#include "cap_helpers.h"
#include "bpf_rand.h"
#include "bpf_util.h"
@@ -1084,7 +1079,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_map_ringbuf) {
map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
- 0, 4096);
+ 0, getpagesize());
do {
prog[*fixup_map_ringbuf].imm = map_fds[20];
fixup_map_ringbuf++;
@@ -1665,22 +1660,6 @@ static bool is_admin(void)
return (caps & ADMIN_CAPS) == ADMIN_CAPS;
}
-static void get_unpriv_disabled()
-{
- char buf[2];
- FILE *fd;
-
- fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
- if (!fd) {
- perror("fopen /proc/sys/"UNPRIV_SYSCTL);
- unpriv_disabled = true;
- return;
- }
- if (fgets(buf, 2, fd) == buf && atoi(buf))
- unpriv_disabled = true;
- fclose(fd);
-}
-
static bool test_as_unpriv(struct bpf_test *test)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
deleted file mode 100644
index 70feda97cee5..000000000000
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/unistd.h>
-
-#include <bpf/bpf.h>
-
-#define LOG_SIZE (1 << 20)
-
-#define err(str...) printf("ERROR: " str)
-
-static const struct bpf_insn code_sample[] = {
- /* We need a few instructions to pass the min log length */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
-};
-
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64) (unsigned long) ptr;
-}
-
-static int load(char *log, size_t log_len, int log_level)
-{
- union bpf_attr attr;
-
- bzero(&attr, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
- attr.insns = ptr_to_u64(code_sample);
- attr.license = ptr_to_u64("GPL");
- attr.log_buf = ptr_to_u64(log);
- attr.log_size = log_len;
- attr.log_level = log_level;
-
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
-static void check_ret(int ret, int exp_errno)
-{
- if (ret > 0) {
- close(ret);
- err("broken sample loaded successfully!?\n");
- exit(1);
- }
-
- if (!ret || errno != exp_errno) {
- err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
- ret, errno, -1, exp_errno);
- exit(1);
- }
-}
-
-static void check_ones(const char *buf, size_t len, const char *msg)
-{
- while (len--)
- if (buf[len] != 1) {
- err("%s", msg);
- exit(1);
- }
-}
-
-static void test_log_good(char *log, size_t buf_len, size_t log_len,
- size_t exp_len, int exp_errno, const char *full_log)
-{
- size_t len;
- int ret;
-
- memset(log, 1, buf_len);
-
- ret = load(log, log_len, 1);
- check_ret(ret, exp_errno);
-
- len = strnlen(log, buf_len);
- if (len == buf_len) {
- err("verifier did not NULL terminate the log\n");
- exit(1);
- }
- if (exp_len && len != exp_len) {
- err("incorrect log length expected:%zd have:%zd\n",
- exp_len, len);
- exit(1);
- }
-
- if (strchr(log, 1)) {
- err("verifier leaked a byte through\n");
- exit(1);
- }
-
- check_ones(log + len + 1, buf_len - len - 1,
- "verifier wrote bytes past NULL termination\n");
-
- if (memcmp(full_log, log, LOG_SIZE)) {
- err("log did not match expected output\n");
- exit(1);
- }
-}
-
-static void test_log_bad(char *log, size_t log_len, int log_level)
-{
- int ret;
-
- ret = load(log, log_len, log_level);
- check_ret(ret, EINVAL);
- if (log)
- check_ones(log, LOG_SIZE,
- "verifier touched log with bad parameters\n");
-}
-
-int main(int argc, char **argv)
-{
- char full_log[LOG_SIZE];
- char log[LOG_SIZE];
- size_t want_len;
- int i;
-
- memset(log, 1, LOG_SIZE);
-
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
- /* Test incorrect attr */
- printf("Test log_level 0...\n");
- test_log_bad(log, LOG_SIZE, 0);
-
- printf("Test log_size < 128...\n");
- test_log_bad(log, 15, 1);
-
- printf("Test log_buff = NULL...\n");
- test_log_bad(NULL, LOG_SIZE, 1);
-
- /* Test with log big enough */
- printf("Test oversized buffer...\n");
- test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
-
- want_len = strlen(full_log);
-
- printf("Test exact buffer...\n");
- test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
-
- printf("Test undersized buffers...\n");
- for (i = 0; i < 64; i++) {
- full_log[want_len - i + 1] = 1;
- full_log[want_len - i] = 0;
-
- test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
- ENOSPC, full_log);
- }
-
- printf("test_verifier_log: OK\n");
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index b077cf58f825..377fb157a57c 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -116,6 +116,7 @@ setup_vethPairs() {
ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
if [ -f /proc/net/if_inet6 ]; then
echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH1}/disable_ipv6
fi
if [[ $verbose -eq 1 ]]; then
echo "setting up ${VETH1}"
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 6c44153755e6..0b5e0829e5be 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -195,7 +195,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
goto err_out;
}
- if (type != BPF_PROG_TYPE_UNSPEC)
+ if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
bpf_program__set_type(prog, type);
flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
@@ -229,3 +229,23 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts);
}
+
+__u64 read_perf_max_sample_freq(void)
+{
+ __u64 sample_freq = 5000; /* fallback to 5000 on error */
+ FILE *f;
+
+ f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+ if (f == NULL) {
+ printf("Failed to open /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ return sample_freq;
+ }
+ if (fscanf(f, "%llu", &sample_freq) != 1) {
+ printf("Failed to parse /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ }
+
+ fclose(f);
+ return sample_freq;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 6ec00bf79cb5..eb8790f928e4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -20,3 +20,5 @@ struct test_filter_set;
int parse_test_list(const char *s,
struct test_filter_set *test_set,
bool is_glob_pattern);
+
+__u64 read_perf_max_sample_freq(void);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 09a16a77bae4..9b070cdf44ac 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -11,8 +11,12 @@
#include <linux/perf_event.h>
#include <sys/mman.h>
#include "trace_helpers.h"
+#include <linux/limits.h>
+#include <libelf.h>
+#include <gelf.h>
-#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define MAX_SYMS 300000
static struct ksym syms[MAX_SYMS];
@@ -136,7 +140,10 @@ void read_trace_pipe(void)
{
int trace_fd;
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0);
+ else
+ trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0);
if (trace_fd < 0)
return;
@@ -230,3 +237,82 @@ ssize_t get_rel_offset(uintptr_t addr)
fclose(f);
return -EINVAL;
}
+
+static int
+parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id)
+{
+ Elf32_Word note_offs = 0;
+
+ while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+ if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") &&
+ !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
+ memcpy(build_id, note_start + note_offs +
+ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz);
+ return (int) nhdr->n_descsz;
+ }
+
+ note_offs = note_offs + sizeof(Elf32_Nhdr) +
+ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ }
+
+ return -ENOENT;
+}
+
+/* Reads binary from *path* file and returns it in the *build_id* buffer
+ * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes.
+ * Returns size of build id on success. On error the error value is
+ * returned.
+ */
+int read_build_id(const char *path, char *build_id, size_t size)
+{
+ int fd, err = -EINVAL;
+ Elf *elf = NULL;
+ GElf_Ehdr ehdr;
+ size_t max, i;
+
+ if (size < BPF_BUILD_ID_SIZE)
+ return -EINVAL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ (void)elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf)
+ goto out;
+ if (elf_kind(elf) != ELF_K_ELF)
+ goto out;
+ if (!gelf_getehdr(elf, &ehdr))
+ goto out;
+
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ GElf_Phdr mem, *phdr;
+ char *data;
+
+ phdr = gelf_getphdr(elf, i, &mem);
+ if (!phdr)
+ goto out;
+ if (phdr->p_type != PT_NOTE)
+ continue;
+ data = elf_rawfile(elf, &max);
+ if (!data)
+ goto out;
+ if (phdr->p_offset + phdr->p_memsz > max)
+ goto out;
+ err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id);
+ if (err > 0)
+ break;
+ }
+
+out:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 53efde0e2998..876f3e711df6 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -4,6 +4,9 @@
#include <bpf/libbpf.h>
+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
+
struct ksym {
long addr;
char *name;
@@ -23,4 +26,6 @@ void read_trace_pipe(void);
ssize_t get_uprobe_offset(const void *addr);
ssize_t get_rel_offset(uintptr_t addr);
+int read_build_id(const char *path, char *build_id, size_t size);
+
#endif
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
new file mode 100644
index 000000000000..2a6efbd0401e
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <error.h>
+#include <stdio.h>
+
+#include "unpriv_helpers.h"
+
+bool get_unpriv_disabled(void)
+{
+ bool disabled;
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/" UNPRIV_SYSCTL, "r");
+ if (fd) {
+ disabled = (fgets(buf, 2, fd) == buf && atoi(buf));
+ fclose(fd);
+ } else {
+ perror("fopen /proc/sys/" UNPRIV_SYSCTL);
+ disabled = true;
+ }
+
+ return disabled;
+}
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.h b/tools/testing/selftests/bpf/unpriv_helpers.h
new file mode 100644
index 000000000000..151f67329665
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdbool.h>
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+
+bool get_unpriv_disabled(void);
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
deleted file mode 100644
index 7d7ebee5cc7a..000000000000
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ /dev/null
@@ -1,68 +0,0 @@
-{
- "invalid and of negative number",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid range check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
- BPF_MOV32_IMM(BPF_REG_3, 1),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check known subreg with unknown reg",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
- /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
- BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 !read_ok",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = 0
-},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
deleted file mode 100644
index 1b138cd2b187..000000000000
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ /dev/null
@@ -1,379 +0,0 @@
-{
- "valid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "valid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a signed variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=48 size=8",
- .result = REJECT,
-},
-{
- "invalid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with no floor check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "R0 unbounded memory access",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3, 11 },
- .errstr = "R0 pointer += pointer",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid read map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = 28,
-},
-{
- "valid read map access into a read-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = 65507,
-},
-{
- "invalid write map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "invalid write map access into a read-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 4 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "valid write map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "valid write map access into a write-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 4 },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "invalid read map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
-{
- "invalid read map access into a write-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c
deleted file mode 100644
index f995777dddb3..000000000000
--- a/tools/testing/selftests/bpf/verifier/basic_stack.c
+++ /dev/null
@@ -1,64 +0,0 @@
-{
- "stack out of bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid write to stack",
- .result = REJECT,
-},
-{
- "uninitialized stack1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr = "invalid indirect read from stack",
- .result = REJECT,
-},
-{
- "uninitialized stack2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid read from stack",
- .result = REJECT,
-},
-{
- "invalid fp arithmetic",
- /* If this gets ever changed, make sure JITs can deal with it. */
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 subtraction from stack pointer",
- .result = REJECT,
-},
-{
- "non-invalid fp arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "misaligned read from stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "misaligned stack access",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 33125d5f6772..43942ce8cf15 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -753,3 +753,132 @@
.result_unpriv = REJECT,
.result = ACCEPT,
},
+{
+ "bound check with JMP_JLT for crossing 64-bit signed boundary",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0x7fffffffffffff10),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+
+ BPF_LD_IMM64(BPF_REG_0, 0x8000000000000000),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ /* r1 unsigned range is [0x7fffffffffffff10, 0x800000000000000f] */
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+},
+{
+ "bound check with JMP_JSLT for crossing 64-bit signed boundary",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 13),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0x7fffffffffffff10),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+
+ BPF_LD_IMM64(BPF_REG_2, 0x8000000000000fff),
+ BPF_LD_IMM64(BPF_REG_0, 0x8000000000000000),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_2, 3),
+ /* r1 signed range is [S64_MIN, S64_MAX] */
+ BPF_JMP_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -3),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+},
+{
+ "bound check for loop upper bound greater than U32_MAX",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 8),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0x100000000),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+
+ BPF_LD_IMM64(BPF_REG_0, 0x100000000),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+},
+{
+ "bound check with JMP32_JLT for crossing 32-bit signed boundary",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 6),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0x7fffff10),
+ BPF_ALU32_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+
+ BPF_MOV32_IMM(BPF_REG_0, 0x80000000),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 1),
+ /* r1 unsigned range is [0, 0x8000000f] */
+ BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_1, -2),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+},
+{
+ "bound check with JMP32_JSLT for crossing 32-bit signed boundary",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 10),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0x7fffff10),
+ BPF_ALU32_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+
+ BPF_MOV32_IMM(BPF_REG_2, 0x80000fff),
+ BPF_MOV32_IMM(BPF_REG_0, 0x80000000),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP32_REG(BPF_JSGT, BPF_REG_0, BPF_REG_2, 3),
+ /* r1 signed range is [S32_MIN, S32_MAX] */
+ BPF_JMP32_REG(BPF_JSLT, BPF_REG_0, BPF_REG_1, -3),
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
deleted file mode 100644
index 3931c481e30c..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ /dev/null
@@ -1,136 +0,0 @@
-{
- "check deducing bounds from const, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "R0 tried to subtract pointer from scalar",
- .result = REJECT,
-},
-{
- "check deducing bounds from const, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "check deducing bounds from const, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "R0 tried to subtract pointer from scalar",
- .result = REJECT,
-},
-{
- "check deducing bounds from const, 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R6 has pointer with unsupported alu operation",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "check deducing bounds from const, 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "R0 tried to subtract pointer from scalar",
- .result = REJECT,
-},
-{
- "check deducing bounds from const, 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "R0 tried to subtract pointer from scalar",
- .result = REJECT,
-},
-{
- "check deducing bounds from const, 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "dereference of modified ctx ptr",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "negative offset ctx ptr R1 off=-1 disallowed",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .errstr = "R0 tried to subtract pointer from scalar",
- .result = REJECT,
-},
-{
- "check deducing bounds from const, 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- /* Marks reg as unknown. */
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
deleted file mode 100644
index bf82b923c5fe..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+++ /dev/null
@@ -1,411 +0,0 @@
-{
- "bounds checks mixing signed and unsigned, positive bounds",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 2",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_MOV64_IMM(BPF_REG_8, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 3",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 4",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 5",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 min value is negative, either use unsigned",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 7",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 8",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 9",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 10",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 11",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- /* Dead branch. */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 12",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 13",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 14",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_8, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
- BPF_JMP_IMM(BPF_JA, 0, 0, -7),
- },
- .fixup_map_hash_8b = { 6 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 15",
- .insns = {
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "unbounded min value",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c
index 6340db6b46dc..0484d3de040d 100644
--- a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c
@@ -10,3 +10,16 @@
.expected_attach_type = BPF_TRACE_FENTRY,
.kfunc = "bpf_modify_return_test",
},
+
+{
+ "btf_ctx_access u32 pointer accept",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), /* load 1nd argument value (u32 pointer) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test9",
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 5702fc9761ef..1bdf2b43e49e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -109,7 +109,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_acquire", 3 },
{ "bpf_kfunc_call_test_release", 5 },
@@ -165,19 +165,23 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 16),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_acquire", 3 },
- { "bpf_kfunc_call_test_release", 9 },
+ { "bpf_kfunc_call_test_offset", 9 },
+ { "bpf_kfunc_call_test_release", 12 },
},
.result_unpriv = REJECT,
.result = REJECT,
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
deleted file mode 100644
index 4eb76ed739ce..000000000000
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ /dev/null
@@ -1,73 +0,0 @@
-{
- "unreachable",
- .insns = {
- BPF_EXIT_INSN(),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "unreachable2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "out of range jump",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "out of range jump2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -2),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "loop (back-edge)",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 1",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "loop2 (back-edge)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 4",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "conditional loop",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
- BPF_EXIT_INSN(),
- },
- .errstr = "infinite loop detected",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
deleted file mode 100644
index 6d65fe3e7321..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "bpf_exit with invalid return code. test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0xffffffff)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0x3)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x2; 0x0)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 is not a known value (ctx)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has unknown scalar value",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_skb.c b/tools/testing/selftests/bpf/verifier/cgroup_skb.c
deleted file mode 100644
index 52e4c03b076b..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_skb.c
+++ /dev/null
@@ -1,197 +0,0 @@
-{
- "direct packet read test#1 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, queue_mapping)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, protocol)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_present)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=76 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#2 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_tci)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_proto)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, priority)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, priority)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, ingress_ifindex)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, hash)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#3 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, cb[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, cb[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, cb[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, cb[4])),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
- offsetof(struct __sk_buff, cb[1])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, cb[2])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
- offsetof(struct __sk_buff, cb[3])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
- offsetof(struct __sk_buff, cb[4])),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#4 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, family)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, remote_port)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, local_port)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of tc_classid for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of data_meta for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, data_meta)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of flow_keys for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, flow_keys)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid write access to napi_id for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
- offsetof(struct __sk_buff, napi_id)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "write tstamp from CGROUP_SKB",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=152 size=8",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "read tstamp from CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_storage.c b/tools/testing/selftests/bpf/verifier/cgroup_storage.c
deleted file mode 100644
index 97057c0a1b8a..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_storage.c
+++ /dev/null
@@ -1,220 +0,0 @@
-{
- "valid cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "valid per-cpu cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid per-cpu cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
deleted file mode 100644
index 0719b0ddec04..000000000000
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ /dev/null
@@ -1,60 +0,0 @@
-{
- "constant register |= constant should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-48 size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 13),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 24),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-48 size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
deleted file mode 100644
index c6c69220a569..000000000000
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
+++ /dev/null
@@ -1,181 +0,0 @@
-{
- "valid access family in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, family)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access local_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid 64B read of size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid read past end of SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size) + 4),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid read offset in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, family) + 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet read for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "direct packet write for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "overlapping checks for direct packet access SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
diff --git a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
deleted file mode 100644
index 698e3779fdd2..000000000000
--- a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
+++ /dev/null
@@ -1,40 +0,0 @@
-{
- "direct stack access with 32-bit wraparound. test1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 2147483647",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 1073741823",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer offset 1073741822",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = REJECT
-},
diff --git a/tools/testing/selftests/bpf/verifier/div0.c b/tools/testing/selftests/bpf/verifier/div0.c
deleted file mode 100644
index 7685edfbcf71..000000000000
--- a/tools/testing/selftests/bpf/verifier/div0.c
+++ /dev/null
@@ -1,184 +0,0 @@
-{
- "DIV32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 2),
- BPF_MOV32_IMM(BPF_REG_2, 16),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 8,
-},
-{
- "DIV32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 by 0, zero check, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 3),
- BPF_MOV32_IMM(BPF_REG_2, 5),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD64 by 0, zero check 2, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, -1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = -1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c
deleted file mode 100644
index acab4f00819f..000000000000
--- a/tools/testing/selftests/bpf/verifier/div_overflow.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Just make sure that JITs used udiv/umod as otherwise we get
- * an exception from INT_MIN/-1 overflow similarly as with div
- * by zero.
- */
-{
- "DIV32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, LLONG_MIN),
- BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
deleted file mode 100644
index 9c4885885aba..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ /dev/null
@@ -1,650 +0,0 @@
-{
- "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, bitwise AND, zero included",
- .insns = {
- /* set max stack size */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
- /* set r3 to a random value */
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- /* use bitwise AND to limit r3 range to [0, 64] */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- /* Call bpf_ringbuf_output(), it is one of a few helper functions with
- * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
- * For unpriv this should signal an error, because memory at &fp[-64] is
- * not initialized.
- */
- BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 4 },
- .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
- .result_unpriv = REJECT,
- /* in privileged mode reads from uninitialized stack locations are permitted */
- .result = ACCEPT,
-},
-{
- "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, bounds + offset",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no max check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- /* because max wasn't checked, signed min is negative */
- .errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no min check",
- .insns = {
- /* set max stack size */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
- /* set r3 to a random value */
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- /* use JMP to limit r3 range to [0, 64] */
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- /* Call bpf_ringbuf_output(), it is one of a few helper functions with
- * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
- * For unpriv this should signal an error, because memory at &fp[-64] is
- * not initialized.
- */
- BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 4 },
- .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
- .result_unpriv = REJECT,
- /* in privileged mode reads from uninitialized stack locations are permitted */
- .result = ACCEPT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), no min check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=49",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=scalar expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 0 /* csum_diff of 64-byte packet */,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=scalar expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=scalar expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: 8 bytes leak",
- .insns = {
- /* set max stack size */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
- /* set r3 to a random value */
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- /* Note: fp[-32] left uninitialized */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- /* Limit r3 range to [1, 64] */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- /* Call bpf_ringbuf_output(), it is one of a few helper functions with
- * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
- * For unpriv this should signal an error, because memory region [1, 64]
- * at &fp[-64] is not fully initialized.
- */
- BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 3 },
- .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64",
- .result_unpriv = REJECT,
- /* in privileged mode reads from uninitialized stack locations are permitted */
- .result = ACCEPT,
-},
-{
- "helper access to variable memory: 8 bytes no leak (init memory)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_packet_access.c b/tools/testing/selftests/bpf/verifier/helper_packet_access.c
deleted file mode 100644
index ae54587e9829..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_packet_access.c
+++ /dev/null
@@ -1,460 +0,0 @@
-{
- "helper access to packet: test1, valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test2, unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test3, variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test4, packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test5, packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test6, cls valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test7, cls unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test8, cls variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test9, cls packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test10, cls packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test11, cls unsuitable helper 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 42),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test12, cls unsuitable helper 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test13, cls helper ok",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test14, cls helper ok sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test15, cls helper fail sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test16, cls helper fail range 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test17, cls helper fail range 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, -9),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test18, cls helper fail range 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, ~0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test19, cls helper range zero",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test20, pkt end as input",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=pkt_end expected=fp",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test21, wrong reg",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c
deleted file mode 100644
index a067b7098b97..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_restricted.c
+++ /dev/null
@@ -1,196 +0,0 @@
-{
- "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unknown func bpf_ktime_get_coarse_ns",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_KPROBE,
-},
-{
- "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unknown func bpf_ktime_get_coarse_ns",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unknown func bpf_ktime_get_coarse_ns",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unknown func bpf_ktime_get_coarse_ns",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
-},
-{
- "bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_EMIT_CALL(BPF_FUNC_timer_init),
- BPF_EXIT_INSN(),
- },
- .fixup_map_timer = { 3, 8 },
- .errstr = "tracing progs cannot use bpf_timer yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_KPROBE,
-},
-{
- "bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_EMIT_CALL(BPF_FUNC_timer_init),
- BPF_EXIT_INSN(),
- },
- .fixup_map_timer = { 3, 8 },
- .errstr = "tracing progs cannot use bpf_timer yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_EMIT_CALL(BPF_FUNC_timer_init),
- BPF_EXIT_INSN(),
- },
- .fixup_map_timer = { 3, 8 },
- .errstr = "tracing progs cannot use bpf_timer yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_EMIT_CALL(BPF_FUNC_timer_init),
- BPF_EXIT_INSN(),
- },
- .fixup_map_timer = { 3, 8 },
- .errstr = "tracing progs cannot use bpf_timer yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
-},
-{
- "bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_spin_lock),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .errstr = "tracing progs cannot use bpf_spin_lock yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_KPROBE,
-},
-{
- "bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_spin_lock),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .errstr = "tracing progs cannot use bpf_spin_lock yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_spin_lock),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .errstr = "tracing progs cannot use bpf_spin_lock yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_spin_lock),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .errstr = "tracing progs cannot use bpf_spin_lock yet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
deleted file mode 100644
index 1c7882ddfa63..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ /dev/null
@@ -1,953 +0,0 @@
-{
- "helper access to map: full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=56",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: negative range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 unbounded memory access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=45",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map lookup helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 8 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map: wrong size",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .fixup_map_hash_16b = { 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=8 off=0 size=16",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, offsetof(struct other_val, bar)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, sizeof(struct other_val) - 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct other_val, bar)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct other_val) - 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, -4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar) + 1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=9 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
deleted file mode 100644
index 02d9e004260b..000000000000
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ /dev/null
@@ -1,161 +0,0 @@
-{
- "ARG_PTR_TO_LONG uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack R4 off -16+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG half-uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8",
- /* in privileged mode reads from uninitialized stack locations are permitted */
- .result = ACCEPT,
-},
-{
- "ARG_PTR_TO_LONG misaligned",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG size < sizeof(long)",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect access to stack R4 off=-4 size=8",
-},
-{
- "ARG_PTR_TO_LONG initialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ld_ind.c b/tools/testing/selftests/bpf/verifier/ld_ind.c
deleted file mode 100644
index 079734227538..000000000000
--- a/tools/testing/selftests/bpf/verifier/ld_ind.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "ld_ind: check calling conv, r1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_5, 1),
- BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
- BPF_EXIT_INSN(),
- },
- .errstr = "R5 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c
deleted file mode 100644
index 73f0dea95546..000000000000
--- a/tools/testing/selftests/bpf/verifier/leak_ptr.c
+++ /dev/null
@@ -1,67 +0,0 @@
-{
- "leak pointer into ctx 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr_unpriv = "R2 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_10,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .errstr_unpriv = "R2 leaks addr into ctx",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "leak pointer into map val",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr_unpriv = "R6 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
deleted file mode 100644
index 17ee84dc7766..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ /dev/null
@@ -1,99 +0,0 @@
-{
- "bpf_map_ptr: read with negative offset rejected",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "R1 is bpf_array invalid negative access: off=-8",
-},
-{
- "bpf_map_ptr: write rejected",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "only read from bpf_array is supported",
-},
-{
- "bpf_map_ptr: read non-existent field rejected",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "bpf_map_ptr: read ops field accepted",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 4 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 has pointer with unsupported alu operation",
- .result = ACCEPT,
-},
-{
- "bpf_map_ptr: r = 0, r = r + map_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 4 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 has pointer with unsupported alu operation",
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_ret_val.c b/tools/testing/selftests/bpf/verifier/map_ret_val.c
deleted file mode 100644
index bdd0e8d18333..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ret_val.c
+++ /dev/null
@@ -1,65 +0,0 @@
-{
- "invalid map_fd for function call",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
- BPF_EXIT_INSN(),
- },
- .errstr = "fd 0 is not pointing to valid bpf_map",
- .result = REJECT,
-},
-{
- "don't check return value before access",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access 'map_value_or_null'",
- .result = REJECT,
-},
-{
- "access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "misaligned value access",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
-{
- "sometimes access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access",
- .errstr_unpriv = "R0 leaks addr",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/masking.c b/tools/testing/selftests/bpf/verifier/masking.c
deleted file mode 100644
index 6e1358c544fd..000000000000
--- a/tools/testing/selftests/bpf/verifier/masking.c
+++ /dev/null
@@ -1,322 +0,0 @@
-{
- "masking, test out of bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 11",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 12",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 4),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 4,
-},
-{
- "masking, test in bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfffffffe,
-},
-{
- "masking, test in bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xabcde),
- BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xabcde,
-},
-{
- "masking, test in bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 46),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -46),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -47),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c
deleted file mode 100644
index b45e8af41420..000000000000
--- a/tools/testing/selftests/bpf/verifier/meta_access.c
+++ /dev/null
@@ -1,235 +0,0 @@
-{
- "meta access, test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet, off=-8",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test4",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test5",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_meta),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 !read_ok",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test6",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test8",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test9",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test10",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test11",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test12",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c
deleted file mode 100644
index eb5ed936580b..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_stack.c
+++ /dev/null
@@ -1,305 +0,0 @@
-{
- "raw_stack: no skb_load_bytes",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- /* Call to skb_load_bytes() omitted. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid read from stack R6 off=-8 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, ~0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, zero len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid zero-sized read",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, no init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs around bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'scalar'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 invalid mem access 'scalar'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs + data",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid indirect access to stack R3 off=-513 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid indirect access to stack R3 off=-1 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid zero-sized read",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, large access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 512),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
deleted file mode 100644
index 2978fb5a769d..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "raw_tracepoint_writable: reject variable offset",
- .insns = {
- /* r6 is our tp buffer */
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- /* move the key (== 0) to r10-8 */
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- /* lookup in the map */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
-
- /* exit clean if null */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
-
- /* shift the buffer pointer to a variable location */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
- /* clobber whatever's there */
- BPF_MOV64_IMM(BPF_REG_7, 4242),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1, },
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 9540164712b7..5a2e154dd1e0 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -142,7 +142,7 @@
.kfunc = "bpf",
.expected_attach_type = BPF_LSM_MAC,
.flags = BPF_F_SLEEPABLE,
- .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
.fixup_kfunc_btf_id = {
{ "bpf_lookup_user_key", 2 },
{ "bpf_key_put", 4 },
@@ -163,7 +163,7 @@
.kfunc = "bpf",
.expected_attach_type = BPF_LSM_MAC,
.flags = BPF_F_SLEEPABLE,
- .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
.fixup_kfunc_btf_id = {
{ "bpf_lookup_system_key", 1 },
{ "bpf_key_put", 3 },
@@ -182,7 +182,7 @@
.kfunc = "bpf",
.expected_attach_type = BPF_LSM_MAC,
.flags = BPF_F_SLEEPABLE,
- .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
.fixup_kfunc_btf_id = {
{ "bpf_key_put", 1 },
},
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
deleted file mode 100644
index 92e3f6a61a79..000000000000
--- a/tools/testing/selftests/bpf/verifier/ringbuf.c
+++ /dev/null
@@ -1,95 +0,0 @@
-{
- "ringbuf: invalid reservation offset 1",
- .insns = {
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- /* store a pointer to the reserved memory in R6 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- /* spill R6(mem) into the stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- /* fill it back in R7 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
- /* should be able to access *(R7) = 0 */
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
- /* submit the reserved ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- /* add invalid offset to reserved ringbuf memory */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 1 },
- .result = REJECT,
- .errstr = "R1 must have zero offset when passed to release func",
-},
-{
- "ringbuf: invalid reservation offset 2",
- .insns = {
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- /* store a pointer to the reserved memory in R6 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- /* spill R6(mem) into the stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- /* fill it back in R7 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
- /* add invalid offset to reserved ringbuf memory */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe),
- /* should be able to access *(R7) = 0 */
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
- /* submit the reserved ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 1 },
- .result = REJECT,
- .errstr = "R7 min value is outside of the allowed memory range",
-},
-{
- "ringbuf: check passing rb mem to helpers",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- /* pass allocated ring buffer memory to fib lookup */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup),
- /* submit the ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 2 },
- .prog_type = BPF_PROG_TYPE_XDP,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
deleted file mode 100644
index d1463bf4949a..000000000000
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ /dev/null
@@ -1,345 +0,0 @@
-{
- "check valid spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* fill it back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- /* should be able to access R0 = *(R2 + 8) */
- /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .retval = POINTER_VALUE,
-},
-{
- "check valid spill/fill, skb mark",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
-},
-{
- "check valid spill/fill, ptr to mem",
- .insns = {
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- /* store a pointer to the reserved memory in R6 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* spill R6(mem) into the stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- /* fill it back in R7 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
- /* should be able to access *(R7) = 0 */
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
- /* submit the reserved ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 1 },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
-},
-{
- "check with invalid reg offset 0",
- .insns = {
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- /* store a pointer to the reserved memory in R6 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* add invalid offset to memory or NULL */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* should not be able to access *(R7) = 0 */
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0),
- /* submit the reserved ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 1 },
- .result = REJECT,
- .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited",
-},
-{
- "check corrupted spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* mess up with R1 pointer on stack */
- BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
- /* fill back into R0 is fine for priv.
- * R0 now becomes SCALAR_VALUE.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- /* Load from R0 should fail. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .errstr = "R0 invalid mem access 'scalar'",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check corrupted spill/fill, LSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
-{
- "check corrupted spill/fill, MSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
-{
- "Spill and refill a u32 const scalar. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- /* r4 = 20 */
- BPF_MOV32_IMM(BPF_REG_4, 20),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = *(u32 *)(r10 -8) */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
- /* r0 = r2 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill a u32 const, refill from another half of the uninit u32 from the stack",
- .insns = {
- /* r4 = 20 */
- BPF_MOV32_IMM(BPF_REG_4, 20),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid read from stack off -4+0 size 4",
- /* in privileged mode reads from uninitialized stack locations are permitted */
- .result = ACCEPT,
-},
-{
- "Spill a u32 const scalar. Refill as u16. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- /* r4 = 20 */
- BPF_MOV32_IMM(BPF_REG_4, 20),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = *(u16 *)(r10 -8) */
- BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
- /* r0 = r2 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill u32 const scalars. Refill as u64. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- /* r6 = 0 */
- BPF_MOV32_IMM(BPF_REG_6, 0),
- /* r7 = 20 */
- BPF_MOV32_IMM(BPF_REG_7, 20),
- /* *(u32 *)(r10 -4) = r6 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4),
- /* *(u32 *)(r10 -8) = r7 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8),
- /* r4 = *(u64 *)(r10 -8) */
- BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
- /* r0 = r2 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- /* r4 = 20 */
- BPF_MOV32_IMM(BPF_REG_4, 20),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = *(u16 *)(r10 -6) */
- BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6),
- /* r0 = r2 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- /* r4 = 20 */
- BPF_MOV32_IMM(BPF_REG_4, 20),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* *(u32 *)(r10 -4) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4),
- /* r4 = *(u32 *)(r10 -4), */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4),
- /* r0 = r2 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill and refill a umax=40 bounded scalar. Offset to skb->data",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, tstamp)),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- /* *(u32 *)(r10 -8) = r4 R4=umax=40 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = (*u32 *)(r10 - 8) */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
- /* r2 += r4 R2=pkt R4=umax=40 */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4),
- /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20),
- /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 1),
- /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "Spill a u32 scalar at fp-4 and then at fp-8",
- .insns = {
- /* r4 = 4321 */
- BPF_MOV32_IMM(BPF_REG_4, 4321),
- /* *(u32 *)(r10 -4) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4),
- /* *(u32 *)(r10 -8) = r4 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
- /* r4 = *(u64 *)(r10 -8) */
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
deleted file mode 100644
index 8ab94d65f3d5..000000000000
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ /dev/null
@@ -1,359 +0,0 @@
-{
- "PTR_TO_STACK store/load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfaceb00c,
-},
-{
- "PTR_TO_STACK store/load - bad alignment on off",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-},
-{
- "PTR_TO_STACK store/load - bad alignment on reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-},
-{
- "PTR_TO_STACK store/load - out of bounds low",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack R1 off=-79992 size=8",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
-},
-{
- "PTR_TO_STACK store/load - out of bounds high",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack R1 off=0 size=8",
-},
-{
- "PTR_TO_STACK check high 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1 off=0 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check high 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1",
-},
-{
- "PTR_TO_STACK check high 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack",
-},
-{
- "PTR_TO_STACK check high 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK check low 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1 off=-513 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check low 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "math between fp pointer",
-},
-{
- "PTR_TO_STACK check low 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack",
-},
-{
- "PTR_TO_STACK check low 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
-},
-{
- "PTR_TO_STACK check low 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK mixed reg/k, 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = -3,
-},
-{
- "PTR_TO_STACK reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "stack pointer arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "store PTR_TO_STACK in R10 to array map using BPF_B",
- .insns = {
- /* Load pointer to map. */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- /* Copy R10 to R9. */
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
- /* Pollute other registers with unaligned values. */
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, -1),
- BPF_MOV64_IMM(BPF_REG_4, -1),
- BPF_MOV64_IMM(BPF_REG_5, -1),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_MOV64_IMM(BPF_REG_7, -1),
- BPF_MOV64_IMM(BPF_REG_8, -1),
- /* Store both R9 and R10 with BPF_B and read back. */
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0),
- /* Should read back as same value. */
- BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 42,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/uninit.c b/tools/testing/selftests/bpf/verifier/uninit.c
deleted file mode 100644
index 987a5871ff1d..000000000000
--- a/tools/testing/selftests/bpf/verifier/uninit.c
+++ /dev/null
@@ -1,39 +0,0 @@
-{
- "read uninitialized register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "read invalid register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R15 is invalid",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit in all branches",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value.c b/tools/testing/selftests/bpf/verifier/value.c
deleted file mode 100644
index 0e42592b1218..000000000000
--- a/tools/testing/selftests/bpf/verifier/value.c
+++ /dev/null
@@ -1,104 +0,0 @@
-{
- "map element value store of cleared call register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R1 !read_ok",
- .errstr = "R1 !read_ok",
- .result = REJECT,
- .result_unpriv = REJECT,
-},
-{
- "map element value with unaligned store",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value with unaligned load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(struct test_val, foo)),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_adj_spill.c b/tools/testing/selftests/bpf/verifier/value_adj_spill.c
deleted file mode 100644
index 7135e8021b81..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_adj_spill.c
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
-{
- "map element value or null is marked on register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
deleted file mode 100644
index 52a8bca14f03..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ /dev/null
@@ -1,220 +0,0 @@
-{
- "multiple registers share map_lookup_elem result",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid memory access with multiple map_lookup_elem calls",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = REJECT,
- .errstr = "R4 !read_ok",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "valid indirect map_lookup_elem access with 2nd lookup in branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_2, 10),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid map access from else condition",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access",
- .result = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map lookup and null branch prediction",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "MAP_VALUE_OR_NULL check_ids() in regsafe()",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* r9 = map_lookup_elem(...) */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
- /* r8 = map_lookup_elem(...) */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- /* r7 = ktime_get_ns() */
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- /* r6 = ktime_get_ns() */
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* if r6 > r7 goto +1 ; no new information about the state is derived from
- * ; this check, thus produced verifier states differ
- * ; only in 'insn_idx'
- * r9 = r8 ; optionally share ID between r9 and r8
- */
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
- /* if r9 == 0 goto <exit> */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
- /* read map value via r8, this is not always
- * safe because r8 might be not equal to r9.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
- /* exit 0 */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .flags = BPF_F_TEST_STATE_FREQ,
- .fixup_map_hash_8b = { 3, 9 },
- .result = REJECT,
- .errstr = "R8 invalid mem access 'map_value_or_null'",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
deleted file mode 100644
index b183e26c03f1..000000000000
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ /dev/null
@@ -1,291 +0,0 @@
-{
- "variable-offset ctx access",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- /* add it to skb. We now have either &skb->len or
- * &skb->pkt_type, but we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- /* dereference it */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "variable ctx access var_off=(0x0; 0x4)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "variable-offset stack read, priv vs unpriv",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it for a stack read */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "variable-offset stack read, uninitialized",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it for a stack read */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid variable-offset read from stack R2",
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "variable-offset stack write, priv vs unpriv",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 8-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-8 or fp-16, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it for a stack write */
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- /* Now read from the address we just wrote. This shows
- * that, after a variable-offset write, a priviledged
- * program can read the slots that were in the range of
- * that write (even if the verifier doesn't actually know
- * if the slot being read was really written to or not.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- /* Variable stack access is rejected for unprivileged.
- */
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "variable-offset stack write clobbers spilled regs",
- .insns = {
- /* Dummy instruction; needed because we need to patch the next one
- * and we can't patch the first instruction.
- */
- BPF_MOV64_IMM(BPF_REG_6, 0),
- /* Make R0 a map ptr */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 8-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-8 or fp-16, but
- * we don't know which.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Spill R0(map ptr) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- /* Dereference the unknown value for a stack write */
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- /* Fill the register back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- /* Try to dereference R2 for a memory load */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- /* The unprivileged case is not too interesting; variable
- * stack access is rejected.
- */
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- /* In the priviledged case, dereferencing a spilled-and-then-filled
- * register is rejected because the previous variable offset stack
- * write might have overwritten the spilled pointer (i.e. we lose track
- * of the spilled register when we analyze the write).
- */
- .errstr = "R2 invalid mem access 'scalar'",
- .result = REJECT,
-},
-{
- "indirect variable-offset stack access, unbounded",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
- bytes_received)),
- /* Check the lower bound but don't check the upper one. */
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
- /* Point the lower bound to initialized stack. Offset is now in range
- * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid unbounded variable-offset indirect access to stack R4",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "indirect variable-offset stack access, max out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid variable-offset indirect access to stack R2",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
- /* add it to fp. We now have either fp-516 or fp-512, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid variable-offset indirect access to stack R2",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min_off < min_initialized",
- .insns = {
- /* Fill only the top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-16 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack R2 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, priv vs unpriv",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "indirect variable-offset stack access, ok",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c
deleted file mode 100644
index b96ef3526815..000000000000
--- a/tools/testing/selftests/bpf/verifier/xadd.c
+++ /dev/null
@@ -1,97 +0,0 @@
-{
- "xadd/w check unaligned stack",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned map",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = REJECT,
- .errstr = "misaligned value access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned pkt",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 99),
- BPF_JMP_IMM(BPF_JA, 0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 1),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R2 pkt is not allowed",
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "xadd/w check whether src/dst got mangled, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
-{
- "xadd/w check whether src/dst got mangled, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp.c b/tools/testing/selftests/bpf/verifier/xdp.c
deleted file mode 100644
index 5ac390508139..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp.c
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "XDP, using ifindex from netdev",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, ingress_ifindex)),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
deleted file mode 100644
index b4ec228eb95d..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+++ /dev/null
@@ -1,1468 +0,0 @@
-{
- "XDP pkt read, pkt_end mangling, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_end mangling, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', corner case, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 83231456d3c5..1db7185181da 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1,10 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#define _GNU_SOURCE
#include <argp.h>
#include <string.h>
#include <stdlib.h>
-#include <linux/compiler.h>
#include <sched.h>
#include <pthread.h>
#include <dirent.h>
@@ -15,10 +14,15 @@
#include <sys/sysinfo.h>
#include <sys/stat.h>
#include <bpf/libbpf.h>
+#include <bpf/btf.h>
#include <libelf.h>
#include <gelf.h>
#include <float.h>
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
enum stat_id {
VERDICT,
DURATION,
@@ -135,12 +139,17 @@ static struct env {
char **filenames;
int filename_cnt;
bool verbose;
+ bool debug;
bool quiet;
- int log_level;
enum resfmt out_fmt;
+ bool show_version;
bool comparison_mode;
bool replay_mode;
+ int log_level;
+ int log_size;
+ bool log_fixed;
+
struct verif_stats *prog_stats;
int prog_stat_cnt;
@@ -169,23 +178,37 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
{
if (!env.verbose)
return 0;
- if (level == LIBBPF_DEBUG /* && !env.verbose */)
+ if (level == LIBBPF_DEBUG && !env.debug)
return 0;
return vfprintf(stderr, format, args);
}
-const char *argp_program_version = "veristat";
+#ifndef VERISTAT_VERSION
+#define VERISTAT_VERSION "<kernel>"
+#endif
+
+const char *argp_program_version = "veristat v" VERISTAT_VERSION;
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
const char argp_program_doc[] =
"veristat BPF verifier stats collection and comparison tool.\n"
"\n"
"USAGE: veristat <obj-file> [<obj-file>...]\n"
-" OR: veristat -C <baseline.csv> <comparison.csv>\n";
+" OR: veristat -C <baseline.csv> <comparison.csv>\n"
+" OR: veristat -R <results.csv>\n";
+
+enum {
+ OPT_LOG_FIXED = 1000,
+ OPT_LOG_SIZE = 1001,
+};
static const struct argp_option opts[] = {
{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+ { "version", 'V', NULL, 0, "Print version" },
{ "verbose", 'v', NULL, 0, "Verbose mode" },
+ { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
+ { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
+ { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
{ "quiet", 'q', NULL, 0, "Quiet mode" },
{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -209,9 +232,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
+ case 'V':
+ env.show_version = true;
+ break;
case 'v':
env.verbose = true;
break;
+ case 'd':
+ env.debug = true;
+ env.verbose = true;
+ break;
case 'q':
env.quiet = true;
break;
@@ -243,6 +273,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case OPT_LOG_FIXED:
+ env.log_fixed = true;
+ break;
+ case OPT_LOG_SIZE:
+ errno = 0;
+ env.log_size = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid log size: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
case 'C':
env.comparison_mode = true;
break;
@@ -772,7 +813,62 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
return 0;
}
-static void fixup_obj(struct bpf_object *obj)
+static int guess_prog_type_by_ctx_name(const char *ctx_name,
+ enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *attach_type)
+{
+ /* We need to guess program type based on its declared context type.
+ * This guess can't be perfect as many different program types might
+ * share the same context type. So we can only hope to reasonably
+ * well guess this and get lucky.
+ *
+ * Just in case, we support both UAPI-side type names and
+ * kernel-internal names.
+ */
+ static struct {
+ const char *uapi_name;
+ const char *kern_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ } ctx_map[] = {
+ /* __sk_buff is most ambiguous, we assume TC program */
+ { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
+ { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
+ { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
+ { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
+ { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
+ { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
+ { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
+ { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
+ { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
+ { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
+ { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
+ /* tracing types with no expected attach type */
+ { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
+ { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
+ /* raw_tp programs use u64[] from kernel side, we don't want
+ * to match on that, probably; so NULL for kern-side type
+ */
+ { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
+ };
+ int i;
+
+ if (!ctx_name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
+ if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
+ (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
+ *prog_type = ctx_map[i].prog_type;
+ *attach_type = ctx_map[i].attach_type;
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
{
struct bpf_map *map;
@@ -792,18 +888,75 @@ static void fixup_obj(struct bpf_object *obj)
bpf_map__set_max_entries(map, 1);
}
}
+
+ /* SEC(freplace) programs can't be loaded with veristat as is,
+ * but we can try guessing their target program's expected type by
+ * looking at the type of program's first argument and substituting
+ * corresponding program type
+ */
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
+ const struct btf *btf = bpf_object__btf(obj);
+ const char *prog_name = bpf_program__name(prog);
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ const struct btf_type *t;
+ const char *ctx_name;
+ int id;
+
+ if (!btf)
+ goto skip_freplace_fixup;
+
+ id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
+ t = btf__type_by_id(btf, id);
+ t = btf__type_by_id(btf, t->type);
+ if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
+ goto skip_freplace_fixup;
+
+ /* context argument is a pointer to a struct/typedef */
+ t = btf__type_by_id(btf, btf_params(t)[0].type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t || !btf_is_ptr(t))
+ goto skip_freplace_fixup;
+ t = btf__type_by_id(btf, t->type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t)
+ goto skip_freplace_fixup;
+
+ ctx_name = btf__name_by_offset(btf, t->name_off);
+
+ if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog, attach_type);
+
+ if (!env.quiet) {
+ printf("Using guessed program type '%s' for %s/%s...\n",
+ libbpf_bpf_prog_type_str(prog_type),
+ filename, prog_name);
+ }
+ } else {
+ if (!env.quiet) {
+ printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ ctx_name, filename, prog_name);
+ }
+ }
+ }
+skip_freplace_fixup:
+ return;
}
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *prog_name = bpf_program__name(prog);
- size_t buf_sz = sizeof(verif_log_buf);
- char *buf = verif_log_buf;
+ const char *base_filename = basename(filename);
+ char *buf;
+ int buf_sz, log_level;
struct verif_stats *stats;
int err = 0;
void *tmp;
- if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
+ if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
env.progs_skipped++;
return 0;
}
@@ -816,25 +969,30 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
memset(stats, 0, sizeof(*stats));
if (env.verbose) {
- buf_sz = 16 * 1024 * 1024;
+ buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
buf = malloc(buf_sz);
if (!buf)
return -ENOMEM;
- bpf_program__set_log_buf(prog, buf, buf_sz);
- bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
+ /* ensure we always request stats */
+ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
} else {
- bpf_program__set_log_buf(prog, buf, buf_sz);
- bpf_program__set_log_level(prog, 4); /* only verifier stats */
+ buf = verif_log_buf;
+ buf_sz = sizeof(verif_log_buf);
+ /* request only verifier stats */
+ log_level = 4 | (env.log_fixed ? 8 : 0);
}
verif_log_buf[0] = '\0';
+ bpf_program__set_log_buf(prog, buf, buf_sz);
+ bpf_program__set_log_level(prog, log_level);
+
/* increase chances of successful BPF object loading */
- fixup_obj(obj);
+ fixup_obj(obj, prog, base_filename);
err = bpf_object__load(obj);
env.progs_processed++;
- stats->file_name = strdup(basename(filename));
+ stats->file_name = strdup(base_filename);
stats->prog_name = strdup(bpf_program__name(prog));
stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
parse_verif_log(buf, buf_sz, stats);
@@ -913,6 +1071,7 @@ static int process_obj(const char *filename)
goto cleanup;
}
+ lprog = NULL;
bpf_object__for_each_program(tprog, tobj) {
const char *tprog_name = bpf_program__name(tprog);
@@ -1691,18 +1850,22 @@ static int handle_comparison_mode(void)
join->stats_b = comp;
i++;
j++;
- } else if (comp == &fallback_stats || r < 0) {
+ } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
join->file_name = base->file_name;
join->prog_name = base->prog_name;
join->stats_a = base;
join->stats_b = NULL;
i++;
- } else {
+ } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
join->file_name = comp->file_name;
join->prog_name = comp->prog_name;
join->stats_a = NULL;
join->stats_b = comp;
j++;
+ } else {
+ fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
+ __FILE__, __LINE__, i, j);
+ return -EINVAL;
}
env.join_stat_cnt += 1;
}
@@ -1723,6 +1886,7 @@ static int handle_comparison_mode(void)
one_more_time:
output_comp_headers(cur_fmt);
+ last_idx = -1;
for (i = 0; i < env.join_stat_cnt; i++) {
const struct verif_stats_join *join = &env.join_stats[i];
@@ -1872,6 +2036,11 @@ int main(int argc, char **argv)
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
return 1;
+ if (env.show_version) {
+ printf("%s\n", argp_program_version);
+ return 0;
+ }
+
if (env.verbose && env.quiet) {
fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
index fce12165213b..b449788fbd39 100644
--- a/tools/testing/selftests/bpf/xdp_features.c
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -25,6 +25,7 @@
static struct env {
bool verbosity;
+ char ifname[IF_NAMESIZE];
int ifindex;
bool is_tester;
struct {
@@ -151,20 +152,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'D':
if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT,
&env.dut_addr, NULL)) {
- fprintf(stderr, "Invalid DUT address: %s\n", arg);
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
return ARGP_ERR_UNKNOWN;
}
break;
case 'C':
if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT,
&env.dut_ctrl_addr, NULL)) {
- fprintf(stderr, "Invalid DUT CTRL address: %s\n", arg);
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
return ARGP_ERR_UNKNOWN;
}
break;
case 'T':
if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) {
- fprintf(stderr, "Invalid Tester address: %s\n", arg);
+ fprintf(stderr,
+ "Invalid address assigned to the Tester device: %s\n",
+ arg);
return ARGP_ERR_UNKNOWN;
}
break;
@@ -179,7 +186,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
env.ifindex = if_nametoindex(arg);
if (!env.ifindex)
env.ifindex = strtoul(arg, NULL, 0);
- if (!env.ifindex) {
+ if (!env.ifindex || !if_indextoname(env.ifindex, env.ifname)) {
fprintf(stderr,
"Bad interface index or name (%d): %s\n",
errno, strerror(errno));
@@ -205,6 +212,7 @@ static void set_env_default(void)
env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
env.feature.action = -EINVAL;
env.ifindex = -ENODEV;
+ strcpy(env.ifname, "unknown");
make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT,
&env.dut_ctrl_addr, NULL);
make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
@@ -248,15 +256,18 @@ static int dut_run_echo_thread(pthread_t *t, int *sockfd)
sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL,
DUT_ECHO_PORT, 0, 1);
if (!sockfd) {
- fprintf(stderr, "Failed to create echo socket\n");
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
return -errno;
}
/* start echo channel */
err = pthread_create(t, NULL, dut_echo_thread, sockfd);
if (err) {
- fprintf(stderr, "Failed creating dut_echo thread: %s\n",
- strerror(-err));
+ fprintf(stderr,
+ "Failed creating data UDP thread on device %s: %s\n",
+ env.ifname, strerror(-err));
free_fds(sockfd, 1);
return -EINVAL;
}
@@ -320,9 +331,8 @@ static int dut_attach_xdp_prog(struct xdp_features *skel, int flags)
err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
if (err)
- fprintf(stderr,
- "Failed to attach XDP program to ifindex %d\n",
- env.ifindex);
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
return err;
}
@@ -358,13 +368,16 @@ static int dut_run(struct xdp_features *skel)
sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
DUT_CTRL_PORT, 0, 1);
if (!sockfd) {
- fprintf(stderr, "Failed to create DUT socket\n");
+ fprintf(stderr,
+ "Failed creating control socket on device %s\n", env.ifname);
return -errno;
}
ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen);
if (ctrl_sockfd < 0) {
- fprintf(stderr, "Failed to accept connection on DUT socket\n");
+ fprintf(stderr,
+ "Failed accepting connections on device %s control socket\n",
+ env.ifname);
free_fds(sockfd, 1);
return -errno;
}
@@ -422,8 +435,8 @@ static int dut_run(struct xdp_features *skel)
&opts);
if (err) {
fprintf(stderr,
- "Failed to query XDP cap for ifindex %d\n",
- env.ifindex);
+ "Failed querying XDP cap for device %s\n",
+ env.ifname);
goto end_thread;
}
@@ -447,7 +460,8 @@ static int dut_run(struct xdp_features *skel)
&key, sizeof(key),
&val, sizeof(val), 0);
if (err) {
- fprintf(stderr, "bpf_map_lookup_elem failed\n");
+ fprintf(stderr,
+ "bpf_map_lookup_elem failed (%d)\n", err);
goto end_thread;
}
@@ -489,7 +503,7 @@ static bool tester_collect_detected_cap(struct xdp_features *skel,
err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key),
&val, sizeof(val), 0);
if (err) {
- fprintf(stderr, "bpf_map_lookup_elem failed\n");
+ fprintf(stderr, "bpf_map_lookup_elem failed (%d)\n", err);
return false;
}
@@ -540,7 +554,9 @@ static int send_echo_msg(void)
sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
if (sockfd < 0) {
- fprintf(stderr, "Failed to create echo socket\n");
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
return -errno;
}
@@ -565,7 +581,8 @@ static int tester_run(struct xdp_features *skel)
sockfd = socket(AF_INET6, SOCK_STREAM, 0);
if (sockfd < 0) {
- fprintf(stderr, "Failed to create tester socket\n");
+ fprintf(stderr,
+ "Failed creating tester service control socket\n");
return -errno;
}
@@ -575,7 +592,8 @@ static int tester_run(struct xdp_features *skel)
err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr,
sizeof(env.dut_ctrl_addr));
if (err) {
- fprintf(stderr, "Failed to connect to the DUT\n");
+ fprintf(stderr,
+ "Failed connecting to the Device Under Test control socket\n");
return -errno;
}
@@ -596,8 +614,8 @@ static int tester_run(struct xdp_features *skel)
err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
if (err) {
- fprintf(stderr, "Failed to attach XDP program to ifindex %d\n",
- env.ifindex);
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
goto out;
}
@@ -653,7 +671,7 @@ int main(int argc, char **argv)
return err;
if (env.ifindex < 0) {
- fprintf(stderr, "Invalid ifindex\n");
+ fprintf(stderr, "Invalid device name %s\n", env.ifname);
return -ENODEV;
}
@@ -684,11 +702,12 @@ int main(int argc, char **argv)
if (env.is_tester) {
/* Tester */
- fprintf(stdout, "Starting tester on device %d\n", env.ifindex);
+ fprintf(stdout, "Starting tester service on device %s\n",
+ env.ifname);
err = tester_run(skel);
} else {
/* DUT */
- fprintf(stdout, "Starting DUT on device %d\n", env.ifindex);
+ fprintf(stdout, "Starting test on device %s\n", env.ifname);
err = dut_run(skel);
}
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 1c8acb68b977..987cf0db5ebc 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -141,7 +141,11 @@ static void verify_xdp_metadata(void *data)
meta = data - sizeof(*meta);
printf("rx_timestamp: %llu\n", meta->rx_timestamp);
- printf("rx_hash: %u\n", meta->rx_hash);
+ if (meta->rx_hash_err < 0)
+ printf("No rx_hash err=%d\n", meta->rx_hash_err);
+ else
+ printf("rx_hash: 0x%X with RSS type:0x%X\n",
+ meta->rx_hash, meta->rx_hash_type);
}
static void verify_skb_metadata(int fd)
@@ -212,7 +216,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
while (true) {
errno = 0;
ret = poll(fds, rxq + 1, 1000);
- printf("poll: %d (%d)\n", ret, errno);
+ printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
+ ret, errno, bpf_obj->bss->pkts_skip,
+ bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
if (ret < 0)
break;
if (ret == 0)
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index f6780fbb0a21..0c4624dc6f2f 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -12,4 +12,8 @@
struct xdp_meta {
__u64 rx_timestamp;
__u32 rx_hash;
+ union {
+ __u32 rx_hash_type;
+ __s32 rx_hash_err;
+ };
};
diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h
new file mode 100644
index 000000000000..943133da378a
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_xdp_metadata.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+struct xdp_info {
+ __u64 count;
+} __attribute__((aligned(32)));
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index a17655107a94..5a9691e942de 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -69,6 +69,7 @@
*/
#define _GNU_SOURCE
+#include <assert.h>
#include <fcntl.h>
#include <errno.h>
#include <getopt.h>
@@ -103,6 +104,7 @@
#include <bpf/bpf.h>
#include <linux/filter.h>
#include "../kselftest.h"
+#include "xsk_xdp_metadata.h"
static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
@@ -464,6 +466,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
ifobj->use_fill_ring = true;
ifobj->release_rx = true;
ifobj->validation_func = NULL;
+ ifobj->use_metadata = false;
if (i == 0) {
ifobj->rx_on = false;
@@ -631,7 +634,6 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
if (!pkt_stream)
exit_with_error(ENOMEM);
- pkt_stream->nb_pkts = nb_pkts;
for (i = 0; i < nb_pkts; i++) {
pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
pkt_len);
@@ -798,6 +800,20 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt
return false;
}
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+ void *data = xsk_umem__get_data(buffer, addr);
+ struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+ if (meta->count != pkt->payload) {
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
+ __func__, pkt->payload, meta->count);
+ return false;
+ }
+
+ return true;
+}
+
static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
{
void *data = xsk_umem__get_data(buffer, addr);
@@ -959,7 +975,8 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
addr = xsk_umem__add_offset_to_addr(addr);
if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
- !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
+ !is_offset_correct(umem, pkt_stream, addr, pkt->addr) ||
+ (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
return TEST_FAILURE;
if (ifobj->use_fill_ring)
@@ -1124,7 +1141,14 @@ static int validate_rx_dropped(struct ifobject *ifobject)
if (err)
return TEST_FAILURE;
- if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2)
+ /* The receiver calls getsockopt after receiving the last (valid)
+ * packet which is not the final packet sent in this test (valid and
+ * invalid packets are sent in alternating fashion with the final
+ * packet being invalid). Since the last packet may or may not have
+ * been dropped already, both outcomes must be allowed.
+ */
+ if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1)
return TEST_PASS;
return TEST_FAILURE;
@@ -1635,6 +1659,7 @@ static void testapp_single_pkt(struct test_spec *test)
static void testapp_invalid_desc(struct test_spec *test)
{
+ u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
struct pkt pkts[] = {
/* Zero packet address allowed */
{0, PKT_SIZE, 0, true},
@@ -1644,10 +1669,12 @@ static void testapp_invalid_desc(struct test_spec *test)
{-2, PKT_SIZE, 0, false},
/* Packet too large */
{0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ /* Up to end of umem allowed */
+ {umem_size - PKT_SIZE, PKT_SIZE, 0, true},
/* After umem ends */
- {UMEM_SIZE, PKT_SIZE, 0, false},
+ {umem_size, PKT_SIZE, 0, false},
/* Straddle the end of umem */
- {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
+ {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false},
/* Straddle a page boundrary */
{0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
/* Straddle a 2K boundrary */
@@ -1657,16 +1684,17 @@ static void testapp_invalid_desc(struct test_spec *test)
if (test->ifobj_tx->umem->unaligned_mode) {
/* Crossing a page boundrary allowed */
- pkts[6].valid = true;
+ pkts[7].valid = true;
}
if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
/* Crossing a 2K frame size boundrary not allowed */
- pkts[7].valid = false;
+ pkts[8].valid = false;
}
if (test->ifobj_tx->shared_umem) {
- pkts[4].addr += UMEM_SIZE;
- pkts[5].addr += UMEM_SIZE;
+ pkts[4].addr += umem_size;
+ pkts[5].addr += umem_size;
+ pkts[6].addr += umem_size;
}
pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
@@ -1686,6 +1714,30 @@ static void testapp_xdp_drop(struct test_spec *test)
testapp_validate_traffic(test);
}
+static void testapp_xdp_metadata_count(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+ struct bpf_map *data_map;
+ int count = 0;
+ int key = 0;
+
+ test_spec_set_name(test, "XDP_METADATA_COUNT");
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+ skel_tx->progs.xsk_xdp_populate_metadata,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+ test->ifobj_rx->use_metadata = true;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map))
+ exit_with_error(ENOMEM);
+
+ if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY))
+ exit_with_error(errno);
+
+ testapp_validate_traffic(test);
+}
+
static void testapp_poll_txq_tmout(struct test_spec *test)
{
test_spec_set_name(test, "POLL_TXQ_FULL");
@@ -1825,6 +1877,29 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
test->ifobj_rx->umem->unaligned_mode = true;
testapp_invalid_desc(test);
break;
+ case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: {
+ u64 page_size, umem_size;
+
+ if (!hugepages_present(test->ifobj_tx)) {
+ ksft_test_result_skip("No 2M huge pages present.\n");
+ return;
+ }
+ test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE");
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > PKT_SIZE);
+ assert(umem_size % page_size < page_size - PKT_SIZE);
+ testapp_invalid_desc(test);
+ break;
+ }
case TEST_TYPE_UNALIGNED:
if (!testapp_unaligned(test))
return;
@@ -1835,6 +1910,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
case TEST_TYPE_XDP_DROP_HALF:
testapp_xdp_drop(test);
break;
+ case TEST_TYPE_XDP_METADATA_COUNT:
+ testapp_xdp_metadata_count(test);
+ break;
default:
break;
}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 3e8ec7d8ec32..919327807a4e 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -53,7 +53,6 @@
#define THREAD_TMOUT 3
#define DEFAULT_PKT_CNT (4 * 1024)
#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
-#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
#define RX_FULL_RXQSIZE 32
#define UMEM_HEADROOM_TEST_SIZE 128
#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
@@ -79,6 +78,7 @@ enum test_type {
TEST_TYPE_ALIGNED_INV_DESC,
TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
TEST_TYPE_UNALIGNED_INV_DESC,
+ TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME,
TEST_TYPE_HEADROOM,
TEST_TYPE_TEARDOWN,
TEST_TYPE_BIDI,
@@ -88,6 +88,7 @@ enum test_type {
TEST_TYPE_STATS_FILL_EMPTY,
TEST_TYPE_BPF_RES,
TEST_TYPE_XDP_DROP_HALF,
+ TEST_TYPE_XDP_METADATA_COUNT,
TEST_TYPE_MAX
};
@@ -158,6 +159,7 @@ struct ifobject {
bool use_fill_ring;
bool release_rx;
bool shared_umem;
+ bool use_metadata;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
};
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index a39bb2560d9b..03f92d7aeb19 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -8,11 +8,12 @@ TEST_PROGS := \
dev_addr_lists.sh \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
- option_prio.sh \
+ bond_options.sh \
bond-eth-type-change.sh
TEST_FILES := \
lag_lib.sh \
+ bond_topo_3d1c.sh \
net_forwarding_lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
new file mode 100755
index 000000000000..db29a3146a86
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding options with mode 1,5,6
+
+ALL_TESTS="
+ prio
+ arp_validate
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+source ${lib_dir}/bond_topo_3d1c.sh
+
+skip_prio()
+{
+ local skip=1
+
+ # check if iproute support prio option
+ ip -n ${s_ns} link set eth0 type bond_slave prio 10
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support prio option
+ ip -n ${s_ns} -d link show eth0 | grep -q "prio 10"
+ [[ $? -ne 0 ]] && skip=0
+
+ return $skip
+}
+
+skip_ns()
+{
+ local skip=1
+
+ # check if iproute support ns_ip6_target option
+ ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6}
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support ns_ip6_target option
+ ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}"
+ [[ $? -ne 0 ]] && skip=0
+
+ ip -n ${s_ns} link del bond1
+
+ return $skip
+}
+
+active_slave=""
+check_active_slave()
+{
+ local target_active_slave=$1
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ test "$active_slave" = "$target_active_slave"
+ check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+
+# Test bonding prio option
+prio_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ # check bonding member prio value
+ ip -n ${s_ns} link set eth0 type bond_slave prio 0
+ ip -n ${s_ns} link set eth1 type bond_slave prio 10
+ ip -n ${s_ns} link set eth2 type bond_slave prio 11
+ cmd_jq "ip -n ${s_ns} -d -j link show eth0" \
+ ".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null
+ check_err $? "eth0 prio is not 0"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth1" \
+ ".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null
+ check_err $? "eth1 prio is not 10"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth2" \
+ ".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null
+ check_err $? "eth2 prio is not 11"
+
+ bond_check_connection "setup"
+
+ # active slave should be the primary slave
+ check_active_slave eth1
+
+ # active slave should be the higher prio slave
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "fail over"
+ check_active_slave eth2
+
+ # when only 1 slave is up
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "only 1 slave up"
+ check_active_slave eth0
+
+ # when a higher prio slave change to up
+ ip -n ${s_ns} link set eth2 up
+ bond_check_connection "higher prio slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth2"
+ ;;
+ "1")
+ check_active_slave "eth0"
+ ;;
+ "2")
+ check_active_slave "eth0"
+ ;;
+ esac
+ local pre_active_slave=$active_slave
+
+ # when the primary slave change to up
+ ip -n ${s_ns} link set eth1 up
+ bond_check_connection "primary slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth1"
+ ;;
+ "1")
+ check_active_slave "$pre_active_slave"
+ ;;
+ "2")
+ check_active_slave "$pre_active_slave"
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "pre_active slave down"
+ check_active_slave "eth1"
+ ;;
+ esac
+
+ # Test changing bond slave prio
+ if [[ "$primary_reselect" == "0" ]];then
+ ip -n ${s_ns} link set eth0 type bond_slave prio 1000000
+ ip -n ${s_ns} link set eth1 type bond_slave prio 0
+ ip -n ${s_ns} link set eth2 type bond_slave prio -50
+ ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000'
+ check_err $? "eth0 prio is not 1000000"
+ ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0'
+ check_err $? "eth1 prio is not 0"
+ ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50'
+ check_err $? "eth3 prio is not -50"
+ check_active_slave "eth1"
+
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "change slave prio"
+ check_active_slave "eth0"
+ fi
+}
+
+prio_miimon()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode miimon primary_reselect $primary_reselect"
+ done
+}
+
+prio_arp()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
+ done
+}
+
+prio_ns()
+{
+ local primary_reselect
+ local mode=$1
+
+ if skip_ns; then
+ log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
+ done
+}
+
+prio()
+{
+ local mode modes="active-backup balance-tlb balance-alb"
+
+ if skip_prio; then
+ log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'."
+ return 0
+ fi
+
+ for mode in $modes; do
+ prio_miimon $mode
+ prio_arp $mode
+ prio_ns $mode
+ done
+}
+
+arp_validate_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
+
+ # wait for a while to make sure the mii status stable
+ sleep 5
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ if [ ${mii_status} != "UP" ]; then
+ RET=1
+ log_test "arp_validate" "interface eth$i mii_status $mii_status"
+ fi
+ done
+}
+
+arp_validate_arp()
+{
+ local mode=$1
+ local val
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val"
+ log_test "arp_validate" "$mode arp_ip_target arp_validate $val"
+ done
+}
+
+arp_validate_ns()
+{
+ local mode=$1
+ local val
+
+ if skip_ns; then
+ log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val"
+ log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+ done
+}
+
+arp_validate()
+{
+ arp_validate_arp "active-backup"
+ arp_validate_ns "active-backup"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
new file mode 100644
index 000000000000..4045ca97fb22
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------------------+
+# | bond0 |
+# | + | Server
+# | eth0 | eth1 eth2 | 192.0.2.1/24
+# | +-------------------+ | 2001:db8::1/24
+# | | | | |
+# +-------------------------------------+
+# | | |
+# +-------------------------------------+
+# | | | | |
+# | +---+---------+---------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------------+-------------+ | 2001:db8::254/24
+# | | |
+# +-------------------------------------+
+# |
+# +-------------------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------------------+
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+ ip netns add ${g_ns}
+ ip -n ${g_ns} link add br0 type bridge
+ ip -n ${g_ns} link set br0 up
+ ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+ ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+ ip -n ${g_ns} link del br0
+ ip netns del ${g_ns}
+}
+
+server_create()
+{
+ ip netns add ${s_ns}
+ ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+ for i in $(seq 0 2); do
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+ local param="$1"
+
+ ip -n ${s_ns} link set bond0 down
+ ip -n ${s_ns} link del bond0
+
+ ip -n ${s_ns} link add bond0 type bond $param
+ for i in $(seq 0 2); do
+ ip -n ${s_ns} link set eth$i master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+server_destroy()
+{
+ for i in $(seq 0 2); do
+ ip -n ${s_ns} link del eth${i}
+ done
+ ip netns del ${s_ns}
+}
+
+client_create()
+{
+ ip netns add ${c_ns}
+ ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+ ip -n ${g_ns} link set c0 up
+ ip -n ${g_ns} link set c0 master br0
+
+ ip -n ${c_ns} link set eth0 up
+ ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+ ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+ ip -n ${c_ns} link del eth0
+ ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+bond_check_connection()
+{
+ local msg=${1:-"check connection"}
+
+ sleep 2
+ ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping failed"
+ ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh
deleted file mode 100755
index c32eebff5005..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/option_prio.sh
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test bonding option prio
-#
-
-ALL_TESTS="
- prio_arp_ip_target_test
- prio_miimon_test
-"
-
-REQUIRE_MZ=no
-REQUIRE_JQ=no
-NUM_NETIFS=0
-lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
-
-destroy()
-{
- ip link del bond0 &>/dev/null
- ip link del br0 &>/dev/null
- ip link del veth0 &>/dev/null
- ip link del veth1 &>/dev/null
- ip link del veth2 &>/dev/null
- ip netns del ns1 &>/dev/null
- ip link del veth3 &>/dev/null
-}
-
-cleanup()
-{
- pre_cleanup
-
- destroy
-}
-
-skip()
-{
- local skip=1
- ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null
- ip link add name veth0 type veth peer name veth0_p
- ip link set veth0 master bond0
-
- # check if iproute support prio option
- ip link set dev veth0 type bond_slave prio 10
- [[ $? -ne 0 ]] && skip=0
-
- # check if bonding support prio option
- ip -d link show veth0 | grep -q "prio 10"
- [[ $? -ne 0 ]] && skip=0
-
- ip link del bond0 &>/dev/null
- ip link del veth0
-
- return $skip
-}
-
-active_slave=""
-check_active_slave()
-{
- local target_active_slave=$1
- active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)"
- test "$active_slave" = "$target_active_slave"
- check_err $? "Current active slave is $active_slave but not $target_active_slave"
-}
-
-
-# Test bonding prio option with mode=$mode monitor=$monitor
-# and primary_reselect=$primary_reselect
-prio_test()
-{
- RET=0
-
- local monitor=$1
- local mode=$2
- local primary_reselect=$3
-
- local bond_ip4="192.169.1.2"
- local peer_ip4="192.169.1.1"
- local bond_ip6="2009:0a:0b::02"
- local peer_ip6="2009:0a:0b::01"
-
-
- # create veths
- ip link add name veth0 type veth peer name veth0_p
- ip link add name veth1 type veth peer name veth1_p
- ip link add name veth2 type veth peer name veth2_p
-
- # create bond
- if [[ "$monitor" == "miimon" ]];then
- ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect
- elif [[ "$monitor" == "arp_ip_target" ]];then
- ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect
- elif [[ "$monitor" == "ns_ip6_target" ]];then
- ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect
- fi
- ip link set bond0 up
- ip link set veth0 master bond0
- ip link set veth1 master bond0
- ip link set veth2 master bond0
- # check bonding member prio value
- ip link set dev veth0 type bond_slave prio 0
- ip link set dev veth1 type bond_slave prio 10
- ip link set dev veth2 type bond_slave prio 11
- ip -d link show veth0 | grep -q 'prio 0'
- check_err $? "veth0 prio is not 0"
- ip -d link show veth1 | grep -q 'prio 10'
- check_err $? "veth0 prio is not 10"
- ip -d link show veth2 | grep -q 'prio 11'
- check_err $? "veth0 prio is not 11"
-
- ip link set veth0 up
- ip link set veth1 up
- ip link set veth2 up
- ip link set veth0_p up
- ip link set veth1_p up
- ip link set veth2_p up
-
- # prepare ping target
- ip link add name br0 type bridge
- ip link set br0 up
- ip link set veth0_p master br0
- ip link set veth1_p master br0
- ip link set veth2_p master br0
- ip link add name veth3 type veth peer name veth3_p
- ip netns add ns1
- ip link set veth3_p master br0 up
- ip link set veth3 netns ns1 up
- ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3
- ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3
- ip addr add $bond_ip4/24 dev bond0
- ip addr add $bond_ip6/64 dev bond0
- sleep 5
-
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 1."
- ping6 $peer_ip6 -c5 -I bond0 &>/dev/null
- check_err $? "ping6 failed 1."
-
- # active salve should be the primary slave
- check_active_slave veth1
-
- # active slave should be the higher prio slave
- ip link set $active_slave down
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 2."
- check_active_slave veth2
-
- # when only 1 slave is up
- ip link set $active_slave down
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 3."
- check_active_slave veth0
-
- # when a higher prio slave change to up
- ip link set veth2 up
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 4."
- case $primary_reselect in
- "0")
- check_active_slave "veth2"
- ;;
- "1")
- check_active_slave "veth0"
- ;;
- "2")
- check_active_slave "veth0"
- ;;
- esac
- local pre_active_slave=$active_slave
-
- # when the primary slave change to up
- ip link set veth1 up
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 5."
- case $primary_reselect in
- "0")
- check_active_slave "veth1"
- ;;
- "1")
- check_active_slave "$pre_active_slave"
- ;;
- "2")
- check_active_slave "$pre_active_slave"
- ip link set $active_slave down
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 6."
- check_active_slave "veth1"
- ;;
- esac
-
- # Test changing bond salve prio
- if [[ "$primary_reselect" == "0" ]];then
- ip link set dev veth0 type bond_slave prio 1000000
- ip link set dev veth1 type bond_slave prio 0
- ip link set dev veth2 type bond_slave prio -50
- ip -d link show veth0 | grep -q 'prio 1000000'
- check_err $? "veth0 prio is not 1000000"
- ip -d link show veth1 | grep -q 'prio 0'
- check_err $? "veth1 prio is not 0"
- ip -d link show veth2 | grep -q 'prio -50'
- check_err $? "veth3 prio is not -50"
- check_active_slave "veth1"
-
- ip link set $active_slave down
- ping $peer_ip4 -c5 -I bond0 &>/dev/null
- check_err $? "ping failed 7."
- check_active_slave "veth0"
- fi
-
- cleanup
-
- log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect"
-}
-
-prio_miimon_test()
-{
- local mode
- local primary_reselect
-
- for mode in 1 5 6; do
- for primary_reselect in 0 1 2; do
- prio_test "miimon" $mode $primary_reselect
- done
- done
-}
-
-prio_arp_ip_target_test()
-{
- local primary_reselect
-
- for primary_reselect in 0 1 2; do
- prio_test "arp_ip_target" 1 $primary_reselect
- done
-}
-
-if skip;then
- log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'."
- exit 0
-fi
-
-trap cleanup EXIT
-
-tests_run
-
-exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 3569ff45f7d5..88162b4027c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -18,7 +18,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
swp=$NETIF_NO_CABLE
@@ -371,7 +370,7 @@ test_tc_int_buf()
tc qdisc delete dev $swp root
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
trap cleanup EXIT
setup_wait
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index faa51012cdac..5ad092b9bf10 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -54,31 +54,3 @@ measure_rate()
echo $ir $er
return $ret
}
-
-bail_on_lldpad()
-{
- if systemctl is-active --quiet lldpad; then
-
- cat >/dev/stderr <<-EOF
- WARNING: lldpad is running
-
- lldpad will likely configure DCB, and this test will
- configure Qdiscs. mlxsw does not support both at the
- same time, one of them is arbitrarily going to overwrite
- the other. That will cause spurious failures (or,
- unlikely, passes) of this test.
- EOF
-
- if [[ -z $ALLOW_LLDPAD ]]; then
- cat >/dev/stderr <<-EOF
-
- If you want to run the test anyway, please set
- an environment variable ALLOW_LLDPAD to a
- non-empty string.
- EOF
- exit 1
- else
- return
- fi
- fi
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index f9858e221996..42ce602d8d49 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -79,7 +79,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=6
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
_1KB=1000
_100KB=$((100 * _1KB))
@@ -393,7 +392,7 @@ test_qos_pfc()
log_test "PFC"
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index ceaa76b17a43..139175fd03e7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -5,7 +5,6 @@
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/sch_ets_core.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
ALL_TESTS="
ping_ipv4
@@ -78,5 +77,5 @@ collect_stats()
done
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 45b41b8f3232..299e06a5808c 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -74,7 +74,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source mlxsw_lib.sh
-source qos_lib.sh
ipaddr()
{
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 0d01c7cd82a1..8ecddafa79b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -166,7 +166,7 @@ ecn_mirror_test()
uninstall_qdisc
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 860205338e6f..159108d02895 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -73,7 +73,7 @@ red_mirror_test()
uninstall_qdisc
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
index c6ce0b448bf3..ecc3664376b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
index 8d245f331619..2e0a4efb1703 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
index 013886061f15..6679a338dfc4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 582669ca38e9..c6a8c732b802 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -18,6 +18,7 @@
#include <grp.h>
#include <stdbool.h>
#include <stdarg.h>
+#include <linux/mount.h>
#include "../kselftest_harness.h"
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1de34ec99290..c12df57d5539 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -83,6 +83,7 @@ TEST_GEN_FILES += nat6to4.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
+TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4c7ce07afa2f..d1d421ec10a3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -49,3 +49,4 @@ CONFIG_IPV6_IOAM6_LWTUNNEL=y
CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
CONFIG_VXLAN=m
+CONFIG_IP_SCTP=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 236f6b796a52..a474c60fe348 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -15,6 +15,7 @@ TEST_PROGS = bridge_igmp.sh \
custom_multipath_hash.sh \
dual_vxlan_bridge.sh \
ethtool_extended_state.sh \
+ ethtool_mm.sh \
ethtool.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
new file mode 100755
index 000000000000..c580ad623848
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -0,0 +1,288 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ manual_with_verification_h1_to_h2
+ manual_with_verification_h2_to_h1
+ manual_without_verification_h1_to_h2
+ manual_without_verification_h2_to_h1
+ manual_failed_verification_h1_to_h2
+ manual_failed_verification_h2_to_h1
+ lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+source lib.sh
+
+traffic_test()
+{
+ local if=$1; shift
+ local src=$1; shift
+ local num_pkts=10000
+ local before=
+ local after=
+ local delta=
+
+ before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+ after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ delta=$((after - before))
+
+ # Allow an extra 1% tolerance for random packets sent by the stack
+ [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+ # Processing state diagram whether the "send_r" variable (send response
+ # to verification frame) should be taken into consideration while the
+ # MAC Merge TX direction is disabled. That being said, at least the
+ # NXP ENETC does not, and requires tx-enabled on in order to respond to
+ # the link partner's verification frames.
+ ethtool --set-mm $rx tx-enabled on
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to finish
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_err "$?" "Verification did not succeed"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx tx-enabled off
+
+ log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+ manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+ manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'DISABLED'
+ check_err "$?" "Verification is not disabled"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+ log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+ manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+ manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $rx pmac-enabled off
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to time out
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_fail "$?" "Verification succeeded when it shouldn't have"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+ traffic_test $tx "emac"
+ check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx pmac-enabled on
+
+ log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+ manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+ manual_failed_verification $h2 $h1
+}
+
+lldp_change_add_frag_size()
+{
+ local add_frag_size=$1
+
+ lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+ # Wait for TLVs to be received
+ sleep 2
+ lldptool -i $h2 -t -n -V addEthCaps | \
+ grep -q "Additional fragment size: $add_frag_size"
+}
+
+lldp()
+{
+ RET=0
+
+ systemctl start lldpad
+
+ # Configure the interfaces to receive and transmit LLDPDUs
+ lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+ lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+ # Enable the transmission of Additional Ethernet Capabilities TLV
+ lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+ lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+ # Wait for TLVs to be received
+ sleep 2
+
+ lldptool -i $h1 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h1 pMAC TX is not active"
+
+ lldptool -i $h2 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h2 pMAC TX is not active"
+
+ lldp_change_add_frag_size 3
+ check_err "$?" "addFragSize 3"
+
+ lldp_change_add_frag_size 2
+ check_err "$?" "addFragSize 2"
+
+ lldp_change_add_frag_size 1
+ check_err "$?" "addFragSize 1"
+
+ lldp_change_add_frag_size 0
+ check_err "$?" "addFragSize 0"
+
+ traffic_test $h1 "pmac"
+ check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+ traffic_test $h2 "pmac"
+ check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+ systemctl stop lldpad
+
+ log_test "LLDP"
+}
+
+h1_create()
+{
+ ip link set dev $h1 up
+
+ tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+
+ ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+ ip link set dev $h2 up
+
+ ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+ tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+}
+
+h1_destroy()
+{
+ ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+ tc qdisc del dev $h1 root
+
+ ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 root
+
+ ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+ ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+ check_ethtool_mm_support
+ check_tc_fp_support
+ require_command lldptool
+ bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
index 9c1f76e108af..432fe8469851 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -319,6 +319,19 @@ trap cleanup EXIT
setup_prepare
setup_wait
-tests_run
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used')
+kind=$(ip -j -d link show dev $rp1 |
+ jq -r '.[].linkinfo.info_kind')
+if [[ $used != true ]]; then
+ if [[ $kind == veth ]]; then
+ log_test_skip "l3_stats not offloaded on veth interface"
+ EXIT_STATUS=$ksft_skip
+ else
+ RET=1 log_test "l3_stats not offloaded"
+ fi
+else
+ tests_run
+fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index d47499ba81c7..057c3d0ad620 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -120,6 +120,15 @@ check_tc_action_hw_stats_support()
fi
}
+check_tc_fp_support()
+{
+ tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing frame preemption support"
+ exit $ksft_skip
+ fi
+}
+
check_ethtool_lanes_support()
{
ethtool --help 2>&1| grep lanes &> /dev/null
@@ -129,6 +138,15 @@ check_ethtool_lanes_support()
fi
}
+check_ethtool_mm_support()
+{
+ ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
+ exit $ksft_skip
+ fi
+}
+
check_locked_port_support()
{
if ! bridge -d link show | grep -q " locked"; then
@@ -787,6 +805,17 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+ethtool_std_stats_get()
+{
+ local dev=$1; shift
+ local grp=$1; shift
+ local name=$1; shift
+ local src=$1; shift
+
+ ethtool --json -S $dev --groups $grp -- --src $src | \
+ jq '.[]."'"$grp"'"."'$name'"'
+}
+
qdisc_stats_get()
{
local dev=$1; shift
@@ -1887,3 +1916,34 @@ mldv1_done_get()
payload_template_expand_checksum "$hbh$icmpv6" $checksum
}
+
+bail_on_lldpad()
+{
+ local reason1="$1"; shift
+ local reason2="$1"; shift
+
+ if systemctl is-active --quiet lldpad; then
+
+ cat >/dev/stderr <<-EOF
+ WARNING: lldpad is running
+
+ lldpad will likely $reason1, and this test will
+ $reason2. Both are not supported at the same time,
+ one of them is arbitrarily going to overwrite the
+ other. That will cause spurious failures (or, unlikely,
+ passes) of this test.
+ EOF
+
+ if [[ -z $ALLOW_LLDPAD ]]; then
+ cat >/dev/stderr <<-EOF
+
+ If you want to run the test anyway, please set
+ an environment variable ALLOW_LLDPAD to a
+ non-empty string.
+ EOF
+ exit 1
+ else
+ return
+ fi
+ fi
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
index 75a37c189ef3..df9bcd6a811a 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -57,6 +57,10 @@ tbf_root_test()
tc qdisc del dev $swp2 root
}
+if type -t sch_tbf_pre_hook >/dev/null; then
+ sch_tbf_pre_hook
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
index 72aa21ba88c7..96c997be0d03 100755
--- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
@@ -23,6 +23,10 @@ tbf_test()
tc qdisc del dev $swp2 root
}
+if type -t sch_tbf_pre_hook >/dev/null; then
+ sch_tbf_pre_hook
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index b25a31445ded..c7f9ebeebc2c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -106,8 +106,8 @@ static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] "
- "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
- "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
+ "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] "
+ "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount "
@@ -126,13 +126,13 @@ static void die_usage(void)
fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
- fprintf(stderr, "\t-t num -- set poll timeout to num\n");
- fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
"-- for remove addr tests\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fafd19ec7e1f..26310c17b4c6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -6,6 +6,10 @@
# address all other issues detected by shellcheck.
#shellcheck disable=SC2086
+# ShellCheck incorrectly believes that most of the code here is unreachable
+# because it's invoked by variable name, see how the "tests" array is used
+#shellcheck disable=SC2317
+
ret=0
sin=""
sinfail=""
@@ -371,8 +375,9 @@ check_transfer()
local line
if [ -n "$bytes" ]; then
+ local out_size
# when truncating we must check the size explicitly
- local out_size=$(wc -c $out | awk '{print $1}')
+ out_size=$(wc -c $out | awk '{print $1}')
if [ $out_size -ne $bytes ]; then
echo "[ FAIL ] $what output file has wrong size ($out_size, $bytes)"
fail_test
@@ -500,6 +505,7 @@ kill_events_pids()
kill_tests_wait()
{
+ #shellcheck disable=SC2046
kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1)
wait
}
@@ -1703,7 +1709,7 @@ chk_subflow_nr()
cnt1=$(ss -N $ns1 -tOni | grep -c token)
cnt2=$(ss -N $ns2 -tOni | grep -c token)
- if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+ if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then
echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
fail_test
dump_stats=1
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 48e52f995a98..b1eb7bce599d 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -913,6 +913,7 @@ test_listener()
$client4_port > /dev/null 2>&1 &
local listener_pid=$!
+ sleep 0.5
verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
# ADD_ADDR from client to server machine reusing the subflow port
@@ -928,6 +929,7 @@ test_listener()
# Delete the listener from the client ns, if one was created
kill_wait $listener_pid
+ sleep 0.5
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 7ce46700a3ae..3117a4be0cd0 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -11,7 +11,8 @@ VERBOSE=0
TRACING=0
tests="
- netlink_checks ovsnl: validate netlink attrs and settings"
+ netlink_checks ovsnl: validate netlink attrs and settings
+ upcall_interfaces ovs: test the upcall interfaces"
info() {
[ $VERBOSE = 0 ] || echo $*
@@ -70,6 +71,62 @@ ovs_add_dp () {
on_exit "ovs_sbx $sbxname python3 $ovs_base/ovs-dpctl.py del-dp $1;"
}
+ovs_add_if () {
+ info "Adding IF to DP: br:$2 if:$3"
+ if [ "$4" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
+ || return 1
+ else
+ python3 $ovs_base/ovs-dpctl.py add-if \
+ -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ pid=$!
+ on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
+ fi
+}
+
+ovs_del_if () {
+ info "Deleting IF from DP: br:$2 if:$3"
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-if "$2" "$3" || return 1
+}
+
+ovs_netns_spawn_daemon() {
+ sbx=$1
+ shift
+ netns=$1
+ shift
+ info "spawning cmd: $*"
+ ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ pid=$!
+ ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
+}
+
+ovs_add_netns_and_veths () {
+ info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
+ ovs_sbx "$1" ip netns add "$3" || return 1
+ on_exit "ovs_sbx $1 ip netns del $3"
+ ovs_sbx "$1" ip link add "$4" type veth peer name "$5" || return 1
+ on_exit "ovs_sbx $1 ip link del $4 >/dev/null 2>&1"
+ ovs_sbx "$1" ip link set "$4" up || return 1
+ ovs_sbx "$1" ip link set "$5" netns "$3" || return 1
+ ovs_sbx "$1" ip netns exec "$3" ip link set "$5" up || return 1
+
+ if [ "$6" != "" ]; then
+ ovs_sbx "$1" ip netns exec "$3" ip addr add "$6" dev "$5" \
+ || return 1
+ fi
+
+ if [ "$7" != "-u" ]; then
+ ovs_add_if "$1" "$2" "$4" || return 1
+ else
+ ovs_add_if "$1" "$2" "$4" -u || return 1
+ fi
+
+ [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
+ tcpdump -i any -s 65535
+
+ return 0
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -101,6 +158,36 @@ test_netlink_checks () {
return 1
fi
+ ovs_add_netns_and_veths "test_netlink_checks" nv0 left left0 l0 || \
+ return 1
+ ovs_add_netns_and_veths "test_netlink_checks" nv0 right right0 r0 || \
+ return 1
+ [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+ wc -l) == 3 ] || \
+ return 1
+ ovs_del_if "test_netlink_checks" nv0 right0 || return 1
+ [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+ wc -l) == 2 ] || \
+ return 1
+
+ return 0
+}
+
+test_upcall_interfaces() {
+ sbx_add "test_upcall_interfaces" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "test_upcall_interfaces" ui0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \
+ 172.31.110.1/24 -u || return 1
+
+ sleep 1
+ info "sending arping"
+ ip netns exec upc arping -I l0 172.31.110.20 -c 1 \
+ >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr
+
+ grep -E "MISS upcall\[0/yes\]: .*arp\(sip=172.31.110.1,tip=172.31.110.20,op=1,sha=" $ovs_dir/left0.out >/dev/null 2>&1 || return 1
return 0
}
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 3243c90d449e..1c8b36bc15d4 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -6,15 +6,23 @@
import argparse
import errno
+import ipaddress
+import logging
+import multiprocessing
+import struct
import sys
+import time
try:
from pyroute2 import NDB
+ from pyroute2.netlink import NLA_F_NESTED
from pyroute2.netlink import NLM_F_ACK
+ from pyroute2.netlink import NLM_F_DUMP
from pyroute2.netlink import NLM_F_REQUEST
from pyroute2.netlink import genlmsg
from pyroute2.netlink import nla
+ from pyroute2.netlink import nlmsg_atoms
from pyroute2.netlink.exceptions import NetlinkError
from pyroute2.netlink.generic import GenericNetlinkSocket
except ModuleNotFoundError:
@@ -40,6 +48,36 @@ OVS_VPORT_CMD_DEL = 2
OVS_VPORT_CMD_GET = 3
OVS_VPORT_CMD_SET = 4
+OVS_FLOW_CMD_NEW = 1
+OVS_FLOW_CMD_DEL = 2
+OVS_FLOW_CMD_GET = 3
+OVS_FLOW_CMD_SET = 4
+
+
+def macstr(mac):
+ outstr = ":".join(["%02X" % i for i in mac])
+ return outstr
+
+
+def convert_mac(mac_str, mask=False):
+ if mac_str is None or mac_str == "":
+ mac_str = "00:00:00:00:00:00"
+ if mask is True and mac_str != "00:00:00:00:00:00":
+ mac_str = "FF:FF:FF:FF:FF:FF"
+ mac_split = mac_str.split(":")
+ ret = bytearray([int(i, 16) for i in mac_split])
+ return bytes(ret)
+
+
+def convert_ipv4(ip, mask=False):
+ if ip is None:
+ ip = 0
+ if mask is True:
+ if ip != 0:
+ ip = int(ipaddress.IPv4Address(ip)) & 0xFFFFFFFF
+
+ return int(ipaddress.IPv4Address(ip))
+
class ovs_dp_msg(genlmsg):
# include the OVS version
@@ -49,8 +87,893 @@ class ovs_dp_msg(genlmsg):
fields = genlmsg.fields + (("dpifindex", "I"),)
-class OvsDatapath(GenericNetlinkSocket):
+class ovsactions(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_ACTION_ATTR_UNSPEC", "none"),
+ ("OVS_ACTION_ATTR_OUTPUT", "uint32"),
+ ("OVS_ACTION_ATTR_USERSPACE", "userspace"),
+ ("OVS_ACTION_ATTR_SET", "none"),
+ ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
+ ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
+ ("OVS_ACTION_ATTR_SAMPLE", "none"),
+ ("OVS_ACTION_ATTR_RECIRC", "uint32"),
+ ("OVS_ACTION_ATTR_HASH", "none"),
+ ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
+ ("OVS_ACTION_ATTR_POP_MPLS", "flag"),
+ ("OVS_ACTION_ATTR_SET_MASKED", "none"),
+ ("OVS_ACTION_ATTR_CT", "ctact"),
+ ("OVS_ACTION_ATTR_TRUNC", "uint32"),
+ ("OVS_ACTION_ATTR_PUSH_ETH", "none"),
+ ("OVS_ACTION_ATTR_POP_ETH", "flag"),
+ ("OVS_ACTION_ATTR_CT_CLEAR", "flag"),
+ ("OVS_ACTION_ATTR_PUSH_NSH", "none"),
+ ("OVS_ACTION_ATTR_POP_NSH", "flag"),
+ ("OVS_ACTION_ATTR_METER", "none"),
+ ("OVS_ACTION_ATTR_CLONE", "none"),
+ ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
+ ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
+ ("OVS_ACTION_ATTR_DEC_TTL", "none"),
+ )
+
+ class ctact(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_CT_ATTR_NONE", "none"),
+ ("OVS_CT_ATTR_COMMIT", "flag"),
+ ("OVS_CT_ATTR_ZONE", "uint16"),
+ ("OVS_CT_ATTR_MARK", "none"),
+ ("OVS_CT_ATTR_LABELS", "none"),
+ ("OVS_CT_ATTR_HELPER", "asciiz"),
+ ("OVS_CT_ATTR_NAT", "natattr"),
+ ("OVS_CT_ATTR_FORCE_COMMIT", "flag"),
+ ("OVS_CT_ATTR_EVENTMASK", "uint32"),
+ ("OVS_CT_ATTR_TIMEOUT", "asciiz"),
+ )
+
+ class natattr(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_NAT_ATTR_NONE", "none"),
+ ("OVS_NAT_ATTR_SRC", "flag"),
+ ("OVS_NAT_ATTR_DST", "flag"),
+ ("OVS_NAT_ATTR_IP_MIN", "ipaddr"),
+ ("OVS_NAT_ATTR_IP_MAX", "ipaddr"),
+ ("OVS_NAT_ATTR_PROTO_MIN", "uint16"),
+ ("OVS_NAT_ATTR_PROTO_MAX", "uint16"),
+ ("OVS_NAT_ATTR_PERSISTENT", "flag"),
+ ("OVS_NAT_ATTR_PROTO_HASH", "flag"),
+ ("OVS_NAT_ATTR_PROTO_RANDOM", "flag"),
+ )
+
+ def dpstr(self, more=False):
+ print_str = "nat("
+
+ if self.get_attr("OVS_NAT_ATTR_SRC"):
+ print_str += "src"
+ elif self.get_attr("OVS_NAT_ATTR_DST"):
+ print_str += "dst"
+ else:
+ print_str += "XXX-unknown-nat"
+
+ if self.get_attr("OVS_NAT_ATTR_IP_MIN") or self.get_attr(
+ "OVS_NAT_ATTR_IP_MAX"
+ ):
+ if self.get_attr("OVS_NAT_ATTR_IP_MIN"):
+ print_str += "=%s," % str(
+ self.get_attr("OVS_NAT_ATTR_IP_MIN")
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_IP_MAX"):
+ print_str += "-%s," % str(
+ self.get_attr("OVS_NAT_ATTR_IP_MAX")
+ )
+ else:
+ print_str += ","
+
+ if self.get_attr("OVS_NAT_ATTR_PROTO_MIN"):
+ print_str += "proto_min=%d," % self.get_attr(
+ "OVS_NAT_ATTR_PROTO_MIN"
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_PROTO_MAX"):
+ print_str += "proto_max=%d," % self.get_attr(
+ "OVS_NAT_ATTR_PROTO_MAX"
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_PERSISTENT"):
+ print_str += "persistent,"
+ if self.get_attr("OVS_NAT_ATTR_HASH"):
+ print_str += "hash,"
+ if self.get_attr("OVS_NAT_ATTR_RANDOM"):
+ print_str += "random"
+ print_str += ")"
+ return print_str
+
+ def dpstr(self, more=False):
+ print_str = "ct("
+
+ if self.get_attr("OVS_CT_ATTR_COMMIT") is not None:
+ print_str += "commit,"
+ if self.get_attr("OVS_CT_ATTR_ZONE") is not None:
+ print_str += "zone=%d," % self.get_attr("OVS_CT_ATTR_ZONE")
+ if self.get_attr("OVS_CT_ATTR_HELPER") is not None:
+ print_str += "helper=%s," % self.get_attr("OVS_CT_ATTR_HELPER")
+ if self.get_attr("OVS_CT_ATTR_NAT") is not None:
+ print_str += self.get_attr("OVS_CT_ATTR_NAT").dpstr(more)
+ print_str += ","
+ if self.get_attr("OVS_CT_ATTR_FORCE_COMMIT") is not None:
+ print_str += "force,"
+ if self.get_attr("OVS_CT_ATTR_EVENTMASK") is not None:
+ print_str += "emask=0x%X," % self.get_attr(
+ "OVS_CT_ATTR_EVENTMASK"
+ )
+ if self.get_attr("OVS_CT_ATTR_TIMEOUT") is not None:
+ print_str += "timeout=%s" % self.get_attr(
+ "OVS_CT_ATTR_TIMEOUT"
+ )
+ print_str += ")"
+ return print_str
+
+ class userspace(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_USERSPACE_ATTR_UNUSED", "none"),
+ ("OVS_USERSPACE_ATTR_PID", "uint32"),
+ ("OVS_USERSPACE_ATTR_USERDATA", "array(uint8)"),
+ ("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", "uint32"),
+ )
+
+ def dpstr(self, more=False):
+ print_str = "userspace("
+ if self.get_attr("OVS_USERSPACE_ATTR_PID") is not None:
+ print_str += "pid=%d," % self.get_attr(
+ "OVS_USERSPACE_ATTR_PID"
+ )
+ if self.get_attr("OVS_USERSPACE_ATTR_USERDATA") is not None:
+ print_str += "userdata="
+ for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
+ print_str += "%x." % f
+ if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+ print_str += "egress_tun_port=%d" % self.get_attr(
+ "OVS_USERSPACE_ATTR_TUN_PORT"
+ )
+ print_str += ")"
+ return print_str
+
+ def dpstr(self, more=False):
+ print_str = ""
+
+ for field in self.nla_map:
+ if field[1] == "none" or self.get_attr(field[0]) is None:
+ continue
+ if print_str != "":
+ print_str += ","
+
+ if field[1] == "uint32":
+ if field[0] == "OVS_ACTION_ATTR_OUTPUT":
+ print_str += "%d" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_RECIRC":
+ print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_TRUNC":
+ print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+ elif field[1] == "flag":
+ if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
+ print_str += "ct_clear"
+ elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
+ print_str += "pop_vlan"
+ elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
+ print_str += "pop_eth"
+ elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
+ print_str += "pop_nsh"
+ elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
+ print_str += "pop_mpls"
+ else:
+ datum = self.get_attr(field[0])
+ print_str += datum.dpstr(more)
+
+ return print_str
+
+
+class ovskey(nla):
+ nla_flags = NLA_F_NESTED
+ nla_map = (
+ ("OVS_KEY_ATTR_UNSPEC", "none"),
+ ("OVS_KEY_ATTR_ENCAP", "none"),
+ ("OVS_KEY_ATTR_PRIORITY", "uint32"),
+ ("OVS_KEY_ATTR_IN_PORT", "uint32"),
+ ("OVS_KEY_ATTR_ETHERNET", "ethaddr"),
+ ("OVS_KEY_ATTR_VLAN", "uint16"),
+ ("OVS_KEY_ATTR_ETHERTYPE", "be16"),
+ ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"),
+ ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"),
+ ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"),
+ ("OVS_KEY_ATTR_UDP", "ovs_key_udp"),
+ ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"),
+ ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"),
+ ("OVS_KEY_ATTR_ARP", "ovs_key_arp"),
+ ("OVS_KEY_ATTR_ND", "ovs_key_nd"),
+ ("OVS_KEY_ATTR_SKB_MARK", "uint32"),
+ ("OVS_KEY_ATTR_TUNNEL", "none"),
+ ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"),
+ ("OVS_KEY_ATTR_TCP_FLAGS", "be16"),
+ ("OVS_KEY_ATTR_DP_HASH", "uint32"),
+ ("OVS_KEY_ATTR_RECIRC_ID", "uint32"),
+ ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"),
+ ("OVS_KEY_ATTR_CT_STATE", "uint32"),
+ ("OVS_KEY_ATTR_CT_ZONE", "uint16"),
+ ("OVS_KEY_ATTR_CT_MARK", "uint32"),
+ ("OVS_KEY_ATTR_CT_LABELS", "none"),
+ ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4", "ovs_key_ct_tuple_ipv4"),
+ ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6", "ovs_key_ct_tuple_ipv6"),
+ ("OVS_KEY_ATTR_NSH", "none"),
+ ("OVS_KEY_ATTR_PACKET_TYPE", "none"),
+ ("OVS_KEY_ATTR_ND_EXTENSIONS", "none"),
+ ("OVS_KEY_ATTR_TUNNEL_INFO", "none"),
+ ("OVS_KEY_ATTR_IPV6_EXTENSIONS", "none"),
+ )
+
+ class ovs_key_proto(nla):
+ fields = (
+ ("src", "!H"),
+ ("dst", "!H"),
+ )
+
+ fields_map = (
+ ("src", "src", "%d", lambda x: int(x) if x is not None else 0),
+ ("dst", "dst", "%d", lambda x: int(x) if x is not None else 0),
+ )
+
+ def __init__(
+ self,
+ protostr,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ self.proto_str = protostr
+ nla.__init__(
+ self,
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ def dpstr(self, masked=None, more=False):
+ outstr = self.proto_str + "("
+ first = False
+ for f in self.fields_map:
+ if first:
+ outstr += ","
+ if masked is None:
+ outstr += "%s=" % f[0]
+ if isinstance(f[2], str):
+ outstr += f[2] % self[f[1]]
+ else:
+ outstr += f[2](self[f[1]])
+ first = True
+ elif more or f[3](masked[f[1]]) != 0:
+ outstr += "%s=" % f[0]
+ if isinstance(f[2], str):
+ outstr += f[2] % self[f[1]]
+ else:
+ outstr += f[2](self[f[1]])
+ outstr += "/"
+ if isinstance(f[2], str):
+ outstr += f[2] % masked[f[1]]
+ else:
+ outstr += f[2](masked[f[1]])
+ first = True
+ outstr += ")"
+ return outstr
+
+ class ethaddr(ovs_key_proto):
+ fields = (
+ ("src", "!6s"),
+ ("dst", "!6s"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ (
+ "dst",
+ "dst",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "eth",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ipv4(ovs_key_proto):
+ fields = (
+ ("src", "!I"),
+ ("dst", "!I"),
+ ("proto", "B"),
+ ("tos", "B"),
+ ("ttl", "B"),
+ ("frag", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ ("proto", "proto", "%d", lambda x: int(x) if x is not None else 0),
+ ("tos", "tos", "%d", lambda x: int(x) if x is not None else 0),
+ ("ttl", "ttl", "%d", lambda x: int(x) if x is not None else 0),
+ ("frag", "frag", "%d", lambda x: int(x) if x is not None else 0),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ipv4",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ipv6(ovs_key_proto):
+ fields = (
+ ("src", "!16s"),
+ ("dst", "!16s"),
+ ("label", "!I"),
+ ("proto", "B"),
+ ("tclass", "B"),
+ ("hlimit", "B"),
+ ("frag", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ lambda x: ipaddress.IPv6Address(x),
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ lambda x: ipaddress.IPv6Address(x),
+ ),
+ ("label", "label", "%d", int),
+ ("proto", "proto", "%d", int),
+ ("tclass", "tclass", "%d", int),
+ ("hlimit", "hlimit", "%d", int),
+ ("frag", "frag", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ipv6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_tcp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "tcp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_udp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "udp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_sctp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "sctp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_icmp(ovs_key_proto):
+ fields = (
+ ("type", "B"),
+ ("code", "B"),
+ )
+
+ fields_map = (
+ ("type", "type", "%d", int),
+ ("code", "code", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "icmp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_icmpv6(ovs_key_icmp):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "icmpv6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_arp(ovs_key_proto):
+ fields = (
+ ("sip", "!I"),
+ ("tip", "!I"),
+ ("op", "!H"),
+ ("sha", "!6s"),
+ ("tha", "!6s"),
+ ("pad", "xx"),
+ )
+
+ fields_map = (
+ (
+ "sip",
+ "sip",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ (
+ "tip",
+ "tip",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ ("op", "op", "%d", lambda x: int(x) if x is not None else 0),
+ (
+ "sha",
+ "sha",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ (
+ "tha",
+ "tha",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "arp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_nd(ovs_key_proto):
+ fields = (
+ ("target", "!16s"),
+ ("sll", "!6s"),
+ ("tll", "!6s"),
+ )
+
+ fields_map = (
+ (
+ "target",
+ "target",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ ),
+ ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")),
+ ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "nd",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ct_tuple_ipv4(ovs_key_proto):
+ fields = (
+ ("src", "!I"),
+ ("dst", "!I"),
+ ("tp_src", "!H"),
+ ("tp_dst", "!H"),
+ ("proto", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ int,
+ ),
+ ("tp_src", "tp_src", "%d", int),
+ ("tp_dst", "tp_dst", "%d", int),
+ ("proto", "proto", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ct_tuple4",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ct_tuple_ipv6(nla):
+ fields = (
+ ("src", "!16s"),
+ ("dst", "!16s"),
+ ("tp_src", "!H"),
+ ("tp_dst", "!H"),
+ ("proto", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big", convertmac),
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ ),
+ ("tp_src", "tp_src", "%d", int),
+ ("tp_dst", "tp_dst", "%d", int),
+ ("proto", "proto", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ct_tuple6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_mpls(nla):
+ fields = (("lse", ">I"),)
+
+ def dpstr(self, mask=None, more=False):
+ print_str = ""
+
+ for field in (
+ (
+ "OVS_KEY_ATTR_PRIORITY",
+ "skb_priority",
+ "%d",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_SKB_MARK",
+ "skb_mark",
+ "%d",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_RECIRC_ID",
+ "recirc_id",
+ "0x%08X",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_DP_HASH",
+ "dp_hash",
+ "0x%08X",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_STATE",
+ "ct_state",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ZONE",
+ "ct_zone",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_MARK",
+ "ct_mark",
+ "0x%08x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4",
+ None,
+ None,
+ False,
+ False,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6",
+ None,
+ None,
+ False,
+ False,
+ ),
+ (
+ "OVS_KEY_ATTR_IN_PORT",
+ "in_port",
+ "%d",
+ lambda x: True,
+ True,
+ ),
+ ("OVS_KEY_ATTR_ETHERNET", None, None, False, False),
+ (
+ "OVS_KEY_ATTR_ETHERTYPE",
+ "eth_type",
+ "0x%04x",
+ lambda x: int(x) == 0xFFFF,
+ True,
+ ),
+ ("OVS_KEY_ATTR_IPV4", None, None, False, False),
+ ("OVS_KEY_ATTR_IPV6", None, None, False, False),
+ ("OVS_KEY_ATTR_ARP", None, None, False, False),
+ ("OVS_KEY_ATTR_TCP", None, None, False, False),
+ (
+ "OVS_KEY_ATTR_TCP_FLAGS",
+ "tcp_flags",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ ("OVS_KEY_ATTR_UDP", None, None, False, False),
+ ("OVS_KEY_ATTR_SCTP", None, None, False, False),
+ ("OVS_KEY_ATTR_ICMP", None, None, False, False),
+ ("OVS_KEY_ATTR_ICMPV6", None, None, False, False),
+ ("OVS_KEY_ATTR_ND", None, None, False, False),
+ ):
+ v = self.get_attr(field[0])
+ if v is not None:
+ m = None if mask is None else mask.get_attr(field[0])
+ if field[4] is False:
+ print_str += v.dpstr(m, more)
+ print_str += ","
+ else:
+ if m is None or field[3](m):
+ print_str += field[1] + "("
+ print_str += field[2] % v
+ print_str += "),"
+ elif more or m != 0:
+ print_str += field[1] + "("
+ print_str += (field[2] % v) + "/" + (field[2] % m)
+ print_str += "),"
+
+ return print_str
+
+
+class OvsPacket(GenericNetlinkSocket):
+ OVS_PACKET_CMD_MISS = 1 # Flow table miss
+ OVS_PACKET_CMD_ACTION = 2 # USERSPACE action
+ OVS_PACKET_CMD_EXECUTE = 3 # Apply actions to packet
+
+ class ovs_packet_msg(ovs_dp_msg):
+ nla_map = (
+ ("OVS_PACKET_ATTR_UNSPEC", "none"),
+ ("OVS_PACKET_ATTR_PACKET", "array(uint8)"),
+ ("OVS_PACKET_ATTR_KEY", "ovskey"),
+ ("OVS_PACKET_ATTR_ACTIONS", "ovsactions"),
+ ("OVS_PACKET_ATTR_USERDATA", "none"),
+ ("OVS_PACKET_ATTR_EGRESS_TUN_KEY", "none"),
+ ("OVS_PACKET_ATTR_UNUSED1", "none"),
+ ("OVS_PACKET_ATTR_UNUSED2", "none"),
+ ("OVS_PACKET_ATTR_PROBE", "none"),
+ ("OVS_PACKET_ATTR_MRU", "uint16"),
+ ("OVS_PACKET_ATTR_LEN", "uint32"),
+ ("OVS_PACKET_ATTR_HASH", "uint64"),
+ )
+
+ def __init__(self):
+ GenericNetlinkSocket.__init__(self)
+ self.bind(OVS_PACKET_FAMILY, OvsPacket.ovs_packet_msg)
+
+ def upcall_handler(self, up=None):
+ print("listening on upcall packet handler:", self.epid)
+ while True:
+ try:
+ msgs = self.get()
+ for msg in msgs:
+ if not up:
+ continue
+ if msg["cmd"] == OvsPacket.OVS_PACKET_CMD_MISS:
+ up.miss(msg)
+ elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_ACTION:
+ up.action(msg)
+ elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
+ up.execute(msg)
+ else:
+ print("Unkonwn cmd: %d" % msg["cmd"])
+ except NetlinkError as ne:
+ raise ne
+
+class OvsDatapath(GenericNetlinkSocket):
OVS_DP_F_VPORT_PIDS = 1 << 1
OVS_DP_F_DISPATCH_UPCALL_PER_CPU = 1 << 3
@@ -62,7 +985,7 @@ class OvsDatapath(GenericNetlinkSocket):
nla_map = (
("OVS_DP_ATTR_UNSPEC", "none"),
("OVS_DP_ATTR_NAME", "asciiz"),
- ("OVS_DP_ATTR_UPCALL_PID", "uint32"),
+ ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"),
("OVS_DP_ATTR_STATS", "dpstats"),
("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"),
("OVS_DP_ATTR_USER_FEATURES", "uint32"),
@@ -113,7 +1036,9 @@ class OvsDatapath(GenericNetlinkSocket):
return reply
- def create(self, dpname, shouldUpcall=False, versionStr=None):
+ def create(
+ self, dpname, shouldUpcall=False, versionStr=None, p=OvsPacket()
+ ):
msg = OvsDatapath.dp_cmd_msg()
msg["cmd"] = OVS_DP_CMD_NEW
if versionStr is None:
@@ -128,11 +1053,18 @@ class OvsDatapath(GenericNetlinkSocket):
if versionStr is not None and versionStr.find(":") != -1:
dpfeatures = int(versionStr.split(":")[1], 0)
else:
- dpfeatures = OvsDatapath.OVS_DP_F_VPORT_PIDS
+ if versionStr is None or versionStr.find(":") == -1:
+ dpfeatures |= OvsDatapath.OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ dpfeatures &= ~OvsDatapath.OVS_DP_F_VPORT_PIDS
+ nproc = multiprocessing.cpu_count()
+ procarray = []
+ for i in range(1, nproc):
+ procarray += [int(p.epid)]
+ msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", procarray])
msg["attrs"].append(["OVS_DP_ATTR_USER_FEATURES", dpfeatures])
if not shouldUpcall:
- msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", 0])
+ msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", [0]])
try:
reply = self.nlm_request(
@@ -170,6 +1102,12 @@ class OvsDatapath(GenericNetlinkSocket):
class OvsVport(GenericNetlinkSocket):
+ OVS_VPORT_TYPE_NETDEV = 1
+ OVS_VPORT_TYPE_INTERNAL = 2
+ OVS_VPORT_TYPE_GRE = 3
+ OVS_VPORT_TYPE_VXLAN = 4
+ OVS_VPORT_TYPE_GENEVE = 5
+
class ovs_vport_msg(ovs_dp_msg):
nla_map = (
("OVS_VPORT_ATTR_UNSPEC", "none"),
@@ -197,21 +1135,35 @@ class OvsVport(GenericNetlinkSocket):
)
def type_to_str(vport_type):
- if vport_type == 1:
+ if vport_type == OvsVport.OVS_VPORT_TYPE_NETDEV:
return "netdev"
- elif vport_type == 2:
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_INTERNAL:
return "internal"
- elif vport_type == 3:
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_GRE:
return "gre"
- elif vport_type == 4:
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_VXLAN:
return "vxlan"
- elif vport_type == 5:
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_GENEVE:
return "geneve"
- return "unknown:%d" % vport_type
+ raise ValueError("Unknown vport type:%d" % vport_type)
- def __init__(self):
+ def str_to_type(vport_type):
+ if vport_type == "netdev":
+ return OvsVport.OVS_VPORT_TYPE_NETDEV
+ elif vport_type == "internal":
+ return OvsVport.OVS_VPORT_TYPE_INTERNAL
+ elif vport_type == "gre":
+ return OvsVport.OVS_VPORT_TYPE_INTERNAL
+ elif vport_type == "vxlan":
+ return OvsVport.OVS_VPORT_TYPE_VXLAN
+ elif vport_type == "geneve":
+ return OvsVport.OVS_VPORT_TYPE_GENEVE
+ raise ValueError("Unknown vport type: '%s'" % vport_type)
+
+ def __init__(self, packet=OvsPacket()):
GenericNetlinkSocket.__init__(self)
self.bind(OVS_VPORT_FAMILY, OvsVport.ovs_vport_msg)
+ self.upcall_packet = packet
def info(self, vport_name, dpifindex=0, portno=None):
msg = OvsVport.ovs_vport_msg()
@@ -238,8 +1190,231 @@ class OvsVport(GenericNetlinkSocket):
raise ne
return reply
+ def attach(self, dpindex, vport_ifname, ptype):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_NEW
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ port_type = OvsVport.str_to_type(ptype)
+
+ msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+ msg["attrs"].append(
+ ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]]
+ )
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.EEXIST:
+ reply = None
+ else:
+ raise ne
+ return reply
+
+ def reset_upcall(self, dpindex, vport_ifname, p=None):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_SET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+ if p == None:
+ p = self.upcall_packet
+ else:
+ self.upcall_packet = p
+
+ msg["attrs"].append(["OVS_VPORT_ATTR_UPCALL_PID", [p.epid]])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ raise ne
+ return reply
+
+ def detach(self, dpindex, vport_ifname):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_DEL
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.ENODEV:
+ reply = None
+ else:
+ raise ne
+ return reply
+
+ def upcall_handler(self, handler=None):
+ self.upcall_packet.upcall_handler(handler)
+
+
+class OvsFlow(GenericNetlinkSocket):
+ class ovs_flow_msg(ovs_dp_msg):
+ nla_map = (
+ ("OVS_FLOW_ATTR_UNSPEC", "none"),
+ ("OVS_FLOW_ATTR_KEY", "ovskey"),
+ ("OVS_FLOW_ATTR_ACTIONS", "ovsactions"),
+ ("OVS_FLOW_ATTR_STATS", "flowstats"),
+ ("OVS_FLOW_ATTR_TCP_FLAGS", "uint8"),
+ ("OVS_FLOW_ATTR_USED", "uint64"),
+ ("OVS_FLOW_ATTR_CLEAR", "none"),
+ ("OVS_FLOW_ATTR_MASK", "ovskey"),
+ ("OVS_FLOW_ATTR_PROBE", "none"),
+ ("OVS_FLOW_ATTR_UFID", "array(uint32)"),
+ ("OVS_FLOW_ATTR_UFID_FLAGS", "uint32"),
+ )
+
+ class flowstats(nla):
+ fields = (
+ ("packets", "=Q"),
+ ("bytes", "=Q"),
+ )
+
+ def dpstr(self, more=False):
+ ufid = self.get_attr("OVS_FLOW_ATTR_UFID")
+ ufid_str = ""
+ if ufid is not None:
+ ufid_str = (
+ "ufid:{:08x}-{:04x}-{:04x}-{:04x}-{:04x}{:08x}".format(
+ ufid[0],
+ ufid[1] >> 16,
+ ufid[1] & 0xFFFF,
+ ufid[2] >> 16,
+ ufid[2] & 0,
+ ufid[3],
+ )
+ )
+
+ key_field = self.get_attr("OVS_FLOW_ATTR_KEY")
+ keymsg = None
+ if key_field is not None:
+ keymsg = key_field
+
+ mask_field = self.get_attr("OVS_FLOW_ATTR_MASK")
+ maskmsg = None
+ if mask_field is not None:
+ maskmsg = mask_field
+
+ acts_field = self.get_attr("OVS_FLOW_ATTR_ACTIONS")
+ actsmsg = None
+ if acts_field is not None:
+ actsmsg = acts_field
+
+ print_str = ""
+
+ if more:
+ print_str += ufid_str + ","
+
+ if keymsg is not None:
+ print_str += keymsg.dpstr(maskmsg, more)
+
+ stats = self.get_attr("OVS_FLOW_ATTR_STATS")
+ if stats is None:
+ print_str += " packets:0, bytes:0,"
+ else:
+ print_str += " packets:%d, bytes:%d," % (
+ stats["packets"],
+ stats["bytes"],
+ )
+
+ used = self.get_attr("OVS_FLOW_ATTR_USED")
+ print_str += " used:"
+ if used is None:
+ print_str += "never,"
+ else:
+ used_time = int(used)
+ cur_time_sec = time.clock_gettime(time.CLOCK_MONOTONIC)
+ used_time = (cur_time_sec * 1000) - used_time
+ print_str += "{}s,".format(used_time / 1000)
+
+ print_str += " actions:"
+ if (
+ actsmsg is None
+ or "attrs" not in actsmsg
+ or len(actsmsg["attrs"]) == 0
+ ):
+ print_str += "drop"
+ else:
+ print_str += actsmsg.dpstr(more)
+
+ return print_str
+
+ def __init__(self):
+ GenericNetlinkSocket.__init__(self)
+
+ self.bind(OVS_FLOW_FAMILY, OvsFlow.ovs_flow_msg)
+
+ def dump(self, dpifindex, flowspec=None):
+ """
+ Returns a list of messages containing flows.
+
+ dpifindex should be a valid datapath obtained by calling
+ into the OvsDatapath lookup
+
+ flowpsec is a string which represents a flow in the dpctl
+ format.
+ """
+ msg = OvsFlow.ovs_flow_msg()
-def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB()):
+ msg["cmd"] = OVS_FLOW_CMD_GET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpifindex
+
+ msg_flags = NLM_F_REQUEST | NLM_F_ACK
+ if flowspec is None:
+ msg_flags |= NLM_F_DUMP
+ rep = None
+
+ try:
+ rep = self.nlm_request(
+ msg,
+ msg_type=self.prid,
+ msg_flags=msg_flags,
+ )
+ except NetlinkError as ne:
+ raise ne
+ return rep
+
+ def miss(self, packetmsg):
+ seq = packetmsg["header"]["sequence_number"]
+ keystr = "(none)"
+ key_field = packetmsg.get_attr("OVS_PACKET_ATTR_KEY")
+ if key_field is not None:
+ keystr = key_field.dpstr(None, True)
+
+ pktdata = packetmsg.get_attr("OVS_PACKET_ATTR_PACKET")
+ pktpres = "yes" if pktdata is not None else "no"
+
+ print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
+
+ def execute(self, packetmsg):
+ print("userspace execute command")
+
+ def action(self, packetmsg):
+ print("userspace action command")
+
+
+def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
dp_name = dp_lookup_rep.get_attr("OVS_DP_ATTR_NAME")
base_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_STATS")
megaflow_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_MEGAFLOW_STATS")
@@ -265,7 +1440,6 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB()):
print(" features: 0x%X" % user_features)
# port print out
- vpl = OvsVport()
for iface in ndb.interfaces:
rep = vpl.info(iface.ifname, ifindex)
if rep is not None:
@@ -280,12 +1454,16 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB()):
def main(argv):
+ nlmsg_atoms.ovskey = ovskey
+ nlmsg_atoms.ovsactions = ovsactions
+
parser = argparse.ArgumentParser()
parser.add_argument(
"-v",
"--verbose",
action="count",
help="Increment 'verbose' output counter.",
+ default=0,
)
subparsers = parser.add_subparsers()
@@ -312,9 +1490,40 @@ def main(argv):
deldpcmd = subparsers.add_parser("del-dp")
deldpcmd.add_argument("deldp", help="Datapath Name")
+ addifcmd = subparsers.add_parser("add-if")
+ addifcmd.add_argument("dpname", help="Datapath Name")
+ addifcmd.add_argument("addif", help="Interface name for adding")
+ addifcmd.add_argument(
+ "-u",
+ "--upcall",
+ action="store_true",
+ help="Leave open a reader for upcalls",
+ )
+ addifcmd.add_argument(
+ "-t",
+ "--ptype",
+ type=str,
+ default="netdev",
+ choices=["netdev", "internal"],
+ help="Interface type (default netdev)",
+ )
+ delifcmd = subparsers.add_parser("del-if")
+ delifcmd.add_argument("dpname", help="Datapath Name")
+ delifcmd.add_argument("delif", help="Interface name for adding")
+
+ dumpflcmd = subparsers.add_parser("dump-flows")
+ dumpflcmd.add_argument("dumpdp", help="Datapath Name")
+
args = parser.parse_args()
+ if args.verbose > 0:
+ if args.verbose > 1:
+ logging.basicConfig(level=logging.DEBUG)
+
+ ovspk = OvsPacket()
ovsdp = OvsDatapath()
+ ovsvp = OvsVport(ovspk)
+ ovsflow = OvsFlow()
ndb = NDB()
if hasattr(args, "showdp"):
@@ -328,7 +1537,7 @@ def main(argv):
if rep is not None:
found = True
- print_ovsdp_full(rep, iface.index, ndb)
+ print_ovsdp_full(rep, iface.index, ndb, ovsvp)
if not found:
msg = "No DP found"
@@ -336,13 +1545,50 @@ def main(argv):
msg += ":'%s'" % args.showdp
print(msg)
elif hasattr(args, "adddp"):
- rep = ovsdp.create(args.adddp, args.upcall, args.versioning)
+ rep = ovsdp.create(args.adddp, args.upcall, args.versioning, ovspk)
if rep is None:
print("DP '%s' already exists" % args.adddp)
else:
print("DP '%s' added" % args.adddp)
+ if args.upcall:
+ ovspk.upcall_handler(ovsflow)
elif hasattr(args, "deldp"):
ovsdp.destroy(args.deldp)
+ elif hasattr(args, "addif"):
+ rep = ovsdp.info(args.dpname, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dpname)
+ return 1
+ dpindex = rep["dpifindex"]
+ rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype)
+ msg = "vport '%s'" % args.addif
+ if rep and rep["header"]["error"] is None:
+ msg += " added."
+ else:
+ msg += " failed to add."
+ if args.upcall:
+ if rep is None:
+ rep = ovsvp.reset_upcall(dpindex, args.addif, ovspk)
+ ovsvp.upcall_handler(ovsflow)
+ elif hasattr(args, "delif"):
+ rep = ovsdp.info(args.dpname, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dpname)
+ return 1
+ rep = ovsvp.detach(rep["dpifindex"], args.delif)
+ msg = "vport '%s'" % args.delif
+ if rep and rep["header"]["error"] is None:
+ msg += " removed."
+ else:
+ msg += " failed to remove."
+ elif hasattr(args, "dumpdp"):
+ rep = ovsdp.info(args.dumpdp, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dumpdp)
+ return 1
+ rep = ovsflow.dump(rep["dpifindex"])
+ for flow in rep:
+ print(flow.dpstr(True if args.verbose > 0 else False))
return 0
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 0fd0d2db3abc..a26c5624429f 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -60,6 +60,7 @@ ip link set dev $VETH up
ip -n $NETNS link set dev $VETH up
chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip link del dev $VETH
ip netns del $NETNS
setup
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 607cc9ad8d1b..6e59b1461dcc 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -208,7 +208,7 @@ void *child_thread(void *arg)
memset(&zc, 0, sizeof(zc));
zc.address = (__u64)((unsigned long)addr);
- zc.length = min(chunk_size, FILE_SZ - lu);
+ zc.length = min(chunk_size, FILE_SZ - total);
res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
&zc, &zc_len);
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
new file mode 100755
index 000000000000..d80f2cd87614
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -0,0 +1,862 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge neighbor suppression functionality. The
+# topology consists of two bridges (VTEPs) connected using VXLAN. A single
+# host is connected to each bridge over multiple VLANs. The test checks that
+# ARP/NS messages from the first host are suppressed on the VXLAN port when
+# should.
+#
+# +-----------------------+ +------------------------+
+# | h1 | | h2 |
+# | | | |
+# | + eth0.10 | | + eth0.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + eth0.20 | | | + eth0.20 |
+# | \ | 192.0.2.17/28 | | \ | 192.0.2.18/28 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + eth0 | | + eth0 |
+# +----|------------------+ +----|-------------------+
+# | |
+# | |
+# +----|-------------------------------+ +----|-------------------------------+
+# | + swp1 + vx0 | | + swp1 + vx0 |
+# | | | | | | | |
+# | | br0 | | | | | |
+# | +------------+-----------+ | | +------------+-----------+ |
+# | | | | | |
+# | | | | | |
+# | +---+---+ | | +---+---+ |
+# | | | | | | | |
+# | | | | | | | |
+# | + + | | + + |
+# | br0.10 br0.20 | | br0.10 br0.20 |
+# | | | |
+# | 192.0.2.33 | | 192.0.2.34 |
+# | + lo | | + lo |
+# | | | |
+# | | | |
+# | 192.0.2.49/28 | | 192.0.2.50/28 |
+# | veth0 +-------+ veth0 |
+# | | | |
+# | sw1 | | sw2 |
+# +------------------------------------+ +------------------------------------+
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ neigh_suppress_arp
+ neigh_suppress_ns
+ neigh_vlan_suppress_arp
+ neigh_vlan_suppress_ns
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+ local ns=$1; shift
+
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+ local ns
+
+ for ns in h1 h2 sw1 sw2; do
+ setup_topo_ns $ns
+ done
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns h1 name eth0
+ ip link set dev veth1 netns sw1 name swp1
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns sw1 name veth0
+ ip link set dev veth1 netns sw2 name veth0
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns h2 name eth0
+ ip link set dev veth1 netns sw2 name swp1
+}
+
+setup_host_common()
+{
+ local ns=$1; shift
+ local v4addr1=$1; shift
+ local v4addr2=$1; shift
+ local v6addr1=$1; shift
+ local v6addr2=$1; shift
+
+ ip -n $ns link set dev eth0 up
+ ip -n $ns link add link eth0 name eth0.10 up type vlan id 10
+ ip -n $ns link add link eth0 name eth0.20 up type vlan id 20
+
+ ip -n $ns address add $v4addr1 dev eth0.10
+ ip -n $ns address add $v4addr2 dev eth0.20
+ ip -n $ns address add $v6addr1 dev eth0.10
+ ip -n $ns address add $v6addr2 dev eth0.20
+}
+
+setup_h1()
+{
+ local ns=h1
+ local v4addr1=192.0.2.1/28
+ local v4addr2=192.0.2.17/28
+ local v6addr1=2001:db8:1::1/64
+ local v6addr2=2001:db8:2::1/64
+
+ setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_h2()
+{
+ local ns=h2
+ local v4addr1=192.0.2.2/28
+ local v4addr2=192.0.2.18/28
+ local v6addr1=2001:db8:1::2/64
+ local v6addr2=2001:db8:2::2/64
+
+ setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_sw_common()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+ local remote_addr=$1; shift
+ local veth_addr=$1; shift
+ local gw_addr=$1; shift
+
+ ip -n $ns address add $local_addr/32 dev lo
+
+ ip -n $ns link set dev veth0 up
+ ip -n $ns address add $veth_addr/28 dev veth0
+ ip -n $ns route add default via $gw_addr
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+
+ ip -n $ns link add link br0 name br0.20 up type vlan id 20
+ bridge -n $ns vlan add vid 20 dev br0 self
+
+ ip -n $ns link set dev swp1 up master br0
+ bridge -n $ns vlan add vid 10 dev swp1
+ bridge -n $ns vlan add vid 20 dev swp1
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 nolearning external
+ bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+ dst $remote_addr src_vni 10010
+ bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+ dst $remote_addr src_vni 10020
+ bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+
+ bridge -n $ns vlan add vid 20 dev vx0
+ bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+}
+
+setup_sw1()
+{
+ local ns=sw1
+ local local_addr=192.0.2.33
+ local remote_addr=192.0.2.34
+ local veth_addr=192.0.2.49
+ local gw_addr=192.0.2.50
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup_sw2()
+{
+ local ns=sw2
+ local local_addr=192.0.2.34
+ local remote_addr=192.0.2.33
+ local veth_addr=192.0.2.50
+ local gw_addr=192.0.2.49
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup()
+{
+ set -e
+
+ setup_topo
+ setup_h1
+ setup_h2
+ setup_sw1
+ setup_sw2
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 sw1 sw2; do
+ ip netns del $ns &> /dev/null
+ done
+}
+
+################################################################################
+# Tests
+
+neigh_suppress_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local h2_mac
+
+ echo
+ echo "Per-port ARP suppression - VLAN $vid"
+ echo "----------------------------------"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass"
+
+ # Initial state - check that ARP requests are not suppressed and that
+ # ARP replies are received.
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression"
+
+ # Enable neighbor suppression and check that nothing changes compared
+ # to the initial state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression"
+
+ # Install an FDB entry for the remote host and check that nothing
+ # changes compared to the initial state.
+ h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+ log_test $? 0 "FDB entry installation"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ # Install a neighbor on the matching SVI interface and check that ARP
+ # requests are suppressed.
+ run_cmd "ip -n sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid"
+ log_test $? 0 "Neighbor entry installation"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ # Take the second host down and check that ARP requests are suppressed
+ # and that ARP replies are received.
+ run_cmd "ip -n h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ run_cmd "ip -n h2 link set dev eth0.$vid up"
+ log_test $? 0 "H2 up"
+
+ # Disable neighbor suppression and check that ARP requests are no
+ # longer suppressed.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "ARP suppression"
+
+ # Take the second host down and check that ARP requests are not
+ # suppressed and that ARP replies are not received.
+ run_cmd "ip -n h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 1 "arping"
+ tc_check_packets sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "ARP suppression"
+}
+
+neigh_suppress_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_arp_common $vid $sip $tip
+}
+
+neigh_suppress_ns_common()
+{
+ local vid=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+ local maddr=$1; shift
+ local h2_mac
+
+ echo
+ echo "Per-port NS suppression - VLAN $vid"
+ echo "---------------------------------"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass"
+
+ # Initial state - check that NS messages are not suppressed and that ND
+ # messages are received.
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression"
+
+ # Enable neighbor suppression and check that nothing changes compared
+ # to the initial state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression"
+
+ # Install an FDB entry for the remote host and check that nothing
+ # changes compared to the initial state.
+ h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+ log_test $? 0 "FDB entry installation"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ # Install a neighbor on the matching SVI interface and check that NS
+ # messages are suppressed.
+ run_cmd "ip -n sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid"
+ log_test $? 0 "Neighbor entry installation"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ # Take the second host down and check that NS messages are suppressed
+ # and that ND messages are received.
+ run_cmd "ip -n h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ run_cmd "ip -n h2 link set dev eth0.$vid up"
+ log_test $? 0 "H2 up"
+
+ # Disable neighbor suppression and check that NS messages are no longer
+ # suppressed.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "NS suppression"
+
+ # Take the second host down and check that NS messages are not
+ # suppressed and that ND messages are not received.
+ run_cmd "ip -n h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 2 "ndisc6"
+ tc_check_packets sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "NS suppression"
+}
+
+neigh_suppress_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local maddr=ff02::1:ff00:2
+
+ neigh_suppress_ns_common $vid $saddr $daddr $maddr
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ maddr=ff02::1:ff00:2
+
+ neigh_suppress_ns_common $vid $saddr $daddr $maddr
+}
+
+neigh_vlan_suppress_arp()
+{
+ local vid1=10
+ local vid2=20
+ local sip1=192.0.2.1
+ local sip2=192.0.2.17
+ local tip1=192.0.2.2
+ local tip2=192.0.2.18
+ local h2_mac1
+ local h2_mac2
+
+ echo
+ echo "Per-{Port, VLAN} ARP suppression"
+ echo "--------------------------------"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass"
+
+ h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+ h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+ run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+ run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+ run_cmd "ip -n sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+ run_cmd "ip -n sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+ # Enable per-{Port, VLAN} neighbor suppression and check that ARP
+ # requests are not suppressed and that ARP replies are received.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 1
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on VLAN 10 and check that only on this
+ # VLAN ARP requests are suppressed.
+ run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 2
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 3
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 4
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on VLAN 10 and check that ARP requests
+ # are no longer suppressed on this VLAN.
+ run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+ # suppression on the port and check that on both VLANs ARP requests are
+ # suppressed.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+}
+
+neigh_vlan_suppress_ns()
+{
+ local vid1=10
+ local vid2=20
+ local saddr1=2001:db8:1::1
+ local saddr2=2001:db8:2::1
+ local daddr1=2001:db8:1::2
+ local daddr2=2001:db8:2::2
+ local maddr=ff02::1:ff00:2
+ local h2_mac1
+ local h2_mac2
+
+ echo
+ echo "Per-{Port, VLAN} NS suppression"
+ echo "-------------------------------"
+
+ run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass"
+ run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass"
+
+ h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+ h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+ run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+ run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+ run_cmd "ip -n sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+ run_cmd "ip -n sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+ # Enable per-{Port, VLAN} neighbor suppression and check that NS
+ # messages are not suppressed and that ND messages are received.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 1
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on VLAN 10 and check that only on this
+ # VLAN NS messages are suppressed.
+ run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 2
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 3
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 4
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on VLAN 10 and check that NS messages
+ # are no longer suppressed on this VLAN.
+ run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+ run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+ # suppression on the port and check that on both VLANs NS messages are
+ # suppressed.
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+ run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+ echo "SKIP: Could not run test without tc tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+ echo "SKIP: Could not run test without arping tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ndisc6)" ]; then
+ echo "SKIP: Could not run test without ndisc6 tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
index b64845b823ab..4fb9368bf751 100644
--- a/tools/virtio/virtio-trace/README
+++ b/tools/virtio/virtio-trace/README
@@ -61,7 +61,7 @@ and
id=channel0,name=agent-ctl-path\
##data path##
-chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
- -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
+ -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,\
id=channel1,name=trace-path-cpu0\
...
diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c
index ee01e40e8bc6..61230532fef1 100644
--- a/usr/gen_init_cpio.c
+++ b/usr/gen_init_cpio.c
@@ -353,6 +353,12 @@ static int cpio_mkfile(const char *name, const char *location,
buf.st_mtime = 0xffffffff;
}
+ if (buf.st_mtime < 0) {
+ fprintf(stderr, "%s: Timestamp negative, clipping.\n",
+ location);
+ buf.st_mtime = 0;
+ }
+
if (buf.st_size > 0xffffffff) {
fprintf(stderr, "%s: Size exceeds maximum cpio file size\n",
location);
@@ -602,10 +608,10 @@ int main (int argc, char *argv[])
/*
* Timestamps after 2106-02-07 06:28:15 UTC have an ascii hex time_t
* representation that exceeds 8 chars and breaks the cpio header
- * specification.
+ * specification. Negative timestamps similarly exceed 8 chars.
*/
- if (default_mtime > 0xffffffff) {
- fprintf(stderr, "ERROR: Timestamp too large for cpio format\n");
+ if (default_mtime > 0xffffffff || default_mtime < 0) {
+ fprintf(stderr, "ERROR: Timestamp out of range for cpio format\n");
exit(1);
}
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2a3ed401ce46..b0af834ffa95 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -55,6 +55,15 @@ irqfd_inject(struct work_struct *work)
irqfd->gsi, 1, false);
}
+static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
+{
+ struct kvm_kernel_irqfd *irqfd;
+
+ list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
+ srcu_read_lock_held(&resampler->kvm->irq_srcu))
+ eventfd_signal(irqfd->resamplefd, 1);
+}
+
/*
* Since resampler irqfds share an IRQ source ID, we de-assert once
* then notify all of the resampler irqfds using this GSI. We can't
@@ -65,7 +74,6 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kernel_irqfd_resampler *resampler;
struct kvm *kvm;
- struct kvm_kernel_irqfd *irqfd;
int idx;
resampler = container_of(kian,
@@ -76,11 +84,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
resampler->notifier.gsi, 0, false);
idx = srcu_read_lock(&kvm->irq_srcu);
-
- list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
- srcu_read_lock_held(&kvm->irq_srcu))
- eventfd_signal(irqfd->resamplefd, 1);
-
+ irqfd_resampler_notify(resampler);
srcu_read_unlock(&kvm->irq_srcu, idx);
}
@@ -96,8 +100,12 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
synchronize_srcu(&kvm->irq_srcu);
if (list_empty(&resampler->list)) {
- list_del(&resampler->link);
+ list_del_rcu(&resampler->link);
kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
+ /*
+ * synchronize_srcu(&kvm->irq_srcu) already called
+ * in kvm_unregister_irq_ack_notifier().
+ */
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0, false);
kfree(resampler);
@@ -369,7 +377,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
resampler->notifier.irq_acked = irqfd_resampler_ack;
INIT_LIST_HEAD(&resampler->link);
- list_add(&resampler->link, &kvm->irqfds.resampler_list);
+ list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list);
kvm_register_irq_ack_notifier(kvm,
&resampler->notifier);
irqfd->resampler = resampler;
@@ -644,6 +652,31 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_unlock_irq(&kvm->irqfds.lock);
}
+bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+ unsigned int irqchip,
+ unsigned int pin)
+{
+ struct kvm_kernel_irqfd_resampler *resampler;
+ int gsi, idx;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1) {
+ list_for_each_entry_srcu(resampler,
+ &kvm->irqfds.resampler_list, link,
+ srcu_read_lock_held(&kvm->irq_srcu)) {
+ if (resampler->notifier.gsi == gsi) {
+ irqfd_resampler_notify(resampler);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return true;
+ }
+ }
+ }
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+
+ return false;
+}
+
/*
* create a host-wide workqueue for issuing deferred shutdown requests
* aggregated from all vm* instances. We need our own isolated
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d255964ec331..b1679d08a216 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4479,7 +4479,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
case KVM_CAP_IRQFD:
- case KVM_CAP_IRQFD_RESAMPLE:
#endif
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM: